pulse-downloader 0.1.31 → 0.1.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/pulse/downloader/client.rb +12 -5
- data/lib/pulse/downloader/version.rb +1 -1
- data/lib/pulse/downloader/web_page_parser.rb +42 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df940062ab2c0dcdc96b256b76e8198683776d7bdb834198c0f75da85d5e63fb
|
4
|
+
data.tar.gz: 158f5746ceb0d820934e126cae7ca53dfca0a841ae8a588b2b955c85de5abc7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2494b72940a92e4b13fc44c8c9c81ff63e48a35940e9bcc6437235afdd4c1ca2fdc43098b71f976ce42d5fa65b53979aa3b055b511c91c1ca6b0ff64a0dd472c
|
7
|
+
data.tar.gz: 1c40920e57fdbb4184034cc91953f82c5babf1c09146027b9b887b1bac5d8b45f4e386b21133bd44625f8adb8843daac8b4652e35e94ebf25188738d02d8a36c
|
data/README.md
CHANGED
@@ -29,6 +29,7 @@ client = Pulse::Downloader::Client.new(
|
|
29
29
|
save_data: true,
|
30
30
|
save_path: '',
|
31
31
|
read_from_save_path: false,
|
32
|
+
traverse_folders: false,
|
32
33
|
verify_ssl: true,
|
33
34
|
drop_exitsing_files_in_path: false,
|
34
35
|
save_and_dont_return: true,
|
@@ -38,6 +39,8 @@ client = Pulse::Downloader::Client.new(
|
|
38
39
|
)
|
39
40
|
|
40
41
|
client.call!
|
42
|
+
|
43
|
+
client.file_paths # Will give you the list of file paths
|
41
44
|
```
|
42
45
|
|
43
46
|
## Development
|
@@ -12,6 +12,7 @@ module Pulse
|
|
12
12
|
:save_data,
|
13
13
|
:save_path,
|
14
14
|
:read_from_save_path,
|
15
|
+
:traverse_folders,
|
15
16
|
:verify_ssl,
|
16
17
|
:headers,
|
17
18
|
:drop_exitsing_files_in_path,
|
@@ -20,7 +21,9 @@ module Pulse
|
|
20
21
|
:start_time,
|
21
22
|
:end_time,
|
22
23
|
:progress_bar,
|
23
|
-
:base_url
|
24
|
+
:base_url,
|
25
|
+
:file_paths,
|
26
|
+
:folder_urls
|
24
27
|
|
25
28
|
# Does not continue downloads-
|
26
29
|
# Will only save once the file has been downloaded in memory
|
@@ -28,7 +31,6 @@ module Pulse
|
|
28
31
|
# TODO: Validation
|
29
32
|
# TODO: Retry
|
30
33
|
# TODO: DNS
|
31
|
-
# TODO: Multiple filetypes
|
32
34
|
# TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
|
33
35
|
def initialize(url:,
|
34
36
|
file_type:,
|
@@ -36,6 +38,7 @@ module Pulse
|
|
36
38
|
save_data: false,
|
37
39
|
save_path: '',
|
38
40
|
read_from_save_path: false,
|
41
|
+
traverse_folders: false,
|
39
42
|
verify_ssl: true,
|
40
43
|
headers: nil,
|
41
44
|
drop_exitsing_files_in_path: false,
|
@@ -49,6 +52,7 @@ module Pulse
|
|
49
52
|
@save_data = save_data
|
50
53
|
@save_path = save_path
|
51
54
|
@read_from_save_path = read_from_save_path
|
55
|
+
@traverse_folders = traverse_folders
|
52
56
|
@verify_ssl = verify_ssl
|
53
57
|
@headers = headers
|
54
58
|
@drop_exitsing_files_in_path = drop_exitsing_files_in_path
|
@@ -57,6 +61,7 @@ module Pulse
|
|
57
61
|
@progress_bar = progress_bar
|
58
62
|
|
59
63
|
@base_url = get_base_url
|
64
|
+
@folder_urls = []
|
60
65
|
end
|
61
66
|
|
62
67
|
def call!
|
@@ -66,12 +71,14 @@ module Pulse
|
|
66
71
|
def call
|
67
72
|
return false unless valid?
|
68
73
|
|
74
|
+
@file_paths = fetch_file_paths
|
75
|
+
|
69
76
|
if @progress_bar
|
70
|
-
@progress_bar = ::ProgressBar.new(
|
77
|
+
@progress_bar = ::ProgressBar.new(file_paths.size)
|
71
78
|
end
|
72
79
|
|
73
|
-
|
74
|
-
download(file_path, @progress_bar)
|
80
|
+
file_paths.map do |file_path|
|
81
|
+
download(file_path, @progress_bar) if save_data
|
75
82
|
@progress_bar.increment!
|
76
83
|
end
|
77
84
|
end
|
@@ -2,9 +2,38 @@ module Pulse
|
|
2
2
|
module Downloader
|
3
3
|
module WebPageParser
|
4
4
|
def fetch_file_paths(custom_path_root=nil)
|
5
|
+
if traverse_folders
|
6
|
+
fetch_folders(url).each do |folder_url|
|
7
|
+
fetch_and_parse_response(folder_url, custom_path_root)
|
8
|
+
end
|
9
|
+
else
|
10
|
+
fetch_and_parse_response(url, custom_path_root)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def fetch_folders(base_url)
|
17
|
+
current_paths = extract_hrefs(get_response(folder_url), custom_path_root)
|
18
|
+
return unless current_paths.compact.size > 0
|
19
|
+
|
20
|
+
@folder_urls = folder_urls.union(current_paths).uniq.compact
|
21
|
+
|
22
|
+
current_paths.each do |path|
|
23
|
+
fetch_folders(path)
|
24
|
+
end
|
25
|
+
|
26
|
+
folder_urls
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch_and_parse_response(folder_url, custom_path_root)
|
30
|
+
parse_response(get_response(folder_url), custom_path_root, file_type)
|
31
|
+
end
|
32
|
+
|
33
|
+
def get_response(folder_url)
|
5
34
|
@start_time = get_micro_second_time
|
6
35
|
|
7
|
-
response = HTTParty.get(
|
36
|
+
response = HTTParty.get(folder_url, verify: verify_ssl, headers: headers)
|
8
37
|
|
9
38
|
@end_time = get_micro_second_time
|
10
39
|
|
@@ -12,6 +41,10 @@ module Pulse
|
|
12
41
|
print_time
|
13
42
|
end
|
14
43
|
|
44
|
+
response
|
45
|
+
end
|
46
|
+
|
47
|
+
def parse_response(response, custom_path_root, file_type)
|
15
48
|
if file_type.is_a?(Array)
|
16
49
|
file_type.flat_map do |type|
|
17
50
|
extract_file_urls(response, custom_path_root, type)
|
@@ -21,8 +54,6 @@ module Pulse
|
|
21
54
|
end
|
22
55
|
end
|
23
56
|
|
24
|
-
private
|
25
|
-
|
26
57
|
def extract_file_urls(response, custom_path_root, type)
|
27
58
|
return [] if response.body.nil? || response.body.empty?
|
28
59
|
|
@@ -33,6 +64,14 @@ module Pulse
|
|
33
64
|
).uniq
|
34
65
|
end
|
35
66
|
|
67
|
+
def extract_hrefs(response, custom_path_root)
|
68
|
+
parse_html(response.body)
|
69
|
+
.css('a')
|
70
|
+
.map { |link| link['href'] }
|
71
|
+
.reject { |link| link == "../" }
|
72
|
+
.map { |link| add_base_url(link, custom_path_root) }
|
73
|
+
end
|
74
|
+
|
36
75
|
def extract_all_urls(response, custom_path_root, type)
|
37
76
|
parse_html(response.body)
|
38
77
|
.to_s
|