pulse-downloader 0.1.39 → 0.1.43
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pulse/downloader/version.rb +1 -1
- data/lib/pulse/downloader/web_page_parser.rb +11 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b7469ba84a0555c2bf67f9807fa2ddfcc62253dcfce0496e7e6450f7aba9e58
|
4
|
+
data.tar.gz: 71f982b1951d9f4bc49a491edf190fae11f9ce25eee223e60a021ff97afce612
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a204eded78564f83f3e9daf8a86e8c9c2dc7ec80e218e8eeccb47cfdce86b0acbdbbc2452aa482ec4b17fe78a35365a1b012ccc27a832e59f89420c241ecadf
|
7
|
+
data.tar.gz: 2b1c0f7eca7b7be38f22d8c633302226e713e6ef20044963187676df4a69c11ad56084ca8fd081450a8937217a004d9ee842a12e0d3e7b1c7a19d0add8a198a6
|
@@ -3,7 +3,7 @@ module Pulse
|
|
3
3
|
module WebPageParser
|
4
4
|
def fetch_file_paths(custom_path_root=nil)
|
5
5
|
if traverse_folders
|
6
|
-
fetch_folders(url, custom_path_root).each do |folder_url|
|
6
|
+
fetch_folders(url, custom_path_root, nil).each do |folder_url|
|
7
7
|
fetch_and_parse_response(folder_url, custom_path_root)
|
8
8
|
end
|
9
9
|
else
|
@@ -13,14 +13,13 @@ module Pulse
|
|
13
13
|
|
14
14
|
private
|
15
15
|
|
16
|
-
def fetch_folders(folder_url, custom_path_root)
|
17
|
-
current_paths = extract_hrefs(get_response(folder_url), custom_path_root)
|
18
|
-
return unless current_paths.compact.size > 0
|
16
|
+
def fetch_folders(folder_url, custom_path_root, inner_string)
|
17
|
+
current_paths = extract_hrefs(get_response(folder_url), custom_path_root, inner_string)
|
19
18
|
|
20
19
|
@folder_urls = folder_urls.union(current_paths).uniq.compact
|
21
20
|
|
22
21
|
current_paths.each do |path|
|
23
|
-
fetch_folders(path,
|
22
|
+
fetch_folders(path, nil, inner_string)
|
24
23
|
end
|
25
24
|
|
26
25
|
folder_urls
|
@@ -64,15 +63,21 @@ module Pulse
|
|
64
63
|
).uniq
|
65
64
|
end
|
66
65
|
|
67
|
-
def extract_hrefs(response, custom_path_root)
|
66
|
+
def extract_hrefs(response, custom_path_root, inner_string)
|
68
67
|
parse_html(response.body)
|
69
68
|
.css('a')
|
70
69
|
.map { |link| "/#{link['href']}" }
|
71
70
|
.reject { |link| link == "../" || link == "/../" }
|
72
71
|
.reject { |link| link.include?('.') } # Remove files
|
72
|
+
.map { |link| append_two_paths(inner_string, link) }
|
73
73
|
.map { |link| add_base_url(link, custom_path_root) }
|
74
74
|
end
|
75
75
|
|
76
|
+
def append_two_paths(inner_string, link)
|
77
|
+
return link if inner_string.nil?
|
78
|
+
"#{inner_string}/#{link}"
|
79
|
+
end
|
80
|
+
|
76
81
|
def extract_all_urls(response, custom_path_root, type)
|
77
82
|
parse_html(response.body)
|
78
83
|
.to_s
|