pulse-downloader 0.1.34 → 0.1.38
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pulse/downloader/client.rb +3 -1
- data/lib/pulse/downloader/version.rb +1 -1
- data/lib/pulse/downloader/web_page_parser.rb +24 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 15b3ba4650d2aa0c01ba6f5456888889bfd888405177aa956f092ba2c0691065
|
4
|
+
data.tar.gz: d28d3d28516d5cbdeaa255923e79e55526a364f70389a3d716ce9e8dec90af08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff848ed64e1adefe773378d9f8a6ccb2d9b7f533a58a10e008b8f7d1b1d678c16978426c0c4a3de8aaa2797c1647304b7dd996fcb979a4cc7a831ce229163fc1
|
7
|
+
data.tar.gz: d9f01143d5d1a27ba38516b0ebed0a4ab49078e796f513d8351a90bbc8506077aa375b8b1a2ee8576b81836957f90ae31b5db31e8fdcc4b18a3dc30170f9b182
|
@@ -22,7 +22,8 @@ module Pulse
|
|
22
22
|
:end_time,
|
23
23
|
:progress_bar,
|
24
24
|
:base_url,
|
25
|
-
:file_paths
|
25
|
+
:file_paths,
|
26
|
+
:folder_urls
|
26
27
|
|
27
28
|
# Does not continue downloads-
|
28
29
|
# Will only save once the file has been downloaded in memory
|
@@ -60,6 +61,7 @@ module Pulse
|
|
60
61
|
@progress_bar = progress_bar
|
61
62
|
|
62
63
|
@base_url = get_base_url
|
64
|
+
@folder_urls = []
|
63
65
|
end
|
64
66
|
|
65
67
|
def call!
|
@@ -3,7 +3,7 @@ module Pulse
|
|
3
3
|
module WebPageParser
|
4
4
|
def fetch_file_paths(custom_path_root=nil)
|
5
5
|
if traverse_folders
|
6
|
-
fetch_folders(url).each do |folder_url|
|
6
|
+
fetch_folders(url, custom_path_root).each do |folder_url|
|
7
7
|
fetch_and_parse_response(folder_url, custom_path_root)
|
8
8
|
end
|
9
9
|
else
|
@@ -13,11 +13,24 @@ module Pulse
|
|
13
13
|
|
14
14
|
private
|
15
15
|
|
16
|
-
def fetch_folders(
|
16
|
+
def fetch_folders(folder_url, custom_path_root)
|
17
|
+
current_paths = extract_hrefs(get_response(folder_url), custom_path_root)
|
18
|
+
return unless current_paths.compact.size > 0
|
17
19
|
|
20
|
+
@folder_urls = folder_urls.union(current_paths).uniq.compact
|
21
|
+
|
22
|
+
current_paths.each do |path|
|
23
|
+
fetch_folders(path, custom_path_root)
|
24
|
+
end
|
25
|
+
|
26
|
+
folder_urls
|
18
27
|
end
|
19
28
|
|
20
29
|
def fetch_and_parse_response(folder_url, custom_path_root)
|
30
|
+
parse_response(get_response(folder_url), custom_path_root, file_type)
|
31
|
+
end
|
32
|
+
|
33
|
+
def get_response(folder_url)
|
21
34
|
@start_time = get_micro_second_time
|
22
35
|
|
23
36
|
response = HTTParty.get(folder_url, verify: verify_ssl, headers: headers)
|
@@ -28,7 +41,7 @@ module Pulse
|
|
28
41
|
print_time
|
29
42
|
end
|
30
43
|
|
31
|
-
|
44
|
+
response
|
32
45
|
end
|
33
46
|
|
34
47
|
def parse_response(response, custom_path_root, file_type)
|
@@ -51,6 +64,14 @@ module Pulse
|
|
51
64
|
).uniq
|
52
65
|
end
|
53
66
|
|
67
|
+
def extract_hrefs(response, custom_path_root)
|
68
|
+
parse_html(response.body)
|
69
|
+
.css('a')
|
70
|
+
.map { |link| "/#{link['href']}" }
|
71
|
+
.reject { |link| link == "../" || link == "/../" }
|
72
|
+
.map { |link| add_base_url(link, custom_path_root) }
|
73
|
+
end
|
74
|
+
|
54
75
|
def extract_all_urls(response, custom_path_root, type)
|
55
76
|
parse_html(response.body)
|
56
77
|
.to_s
|