pulse-downloader 0.1.30 → 0.1.34
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -3
- data/Gemfile.lock +17 -17
- data/README.md +3 -0
- data/lib/pulse/downloader/client.rb +10 -5
- data/lib/pulse/downloader/version.rb +1 -1
- data/lib/pulse/downloader/web_page_parser.rb +21 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee4cd8b3e10c09d8a52ddf222d3d423ffb188a0b7eb4993febbcc97b74644764
|
4
|
+
data.tar.gz: 2126a1a1cecc91c2b0aa275ca17322deac30779682acbd6c08d4dad9f48d412e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b38229caa3120433d2434acc5ca8122c86fef5957db323895ea781473ef1cf40a9ab202f581c48de5ab27f438bfd0d046248867e5ea924ff9d473828d84763f
|
7
|
+
data.tar.gz: fd5908c8b30f58f07fa6496db39b88ba24a9f1dde4a6d87df5e65ba133b00241e6e836aeb4d66badbe832b6c4eb8621f98c1cbca03d3e94cfc2c7cbc18bef2d6
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pulse-downloader (0.1.
|
4
|
+
pulse-downloader (0.1.31)
|
5
5
|
active_attr (~> 0.15)
|
6
6
|
httparty (~> 0.18)
|
7
7
|
nokogiri (~> 1.11)
|
@@ -10,15 +10,15 @@ PATH
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
|
-
actionpack (6.1.
|
14
|
-
actionview (= 6.1.
|
15
|
-
activesupport (= 6.1.
|
13
|
+
actionpack (6.1.4.1)
|
14
|
+
actionview (= 6.1.4.1)
|
15
|
+
activesupport (= 6.1.4.1)
|
16
16
|
rack (~> 2.0, >= 2.0.9)
|
17
17
|
rack-test (>= 0.6.3)
|
18
18
|
rails-dom-testing (~> 2.0)
|
19
19
|
rails-html-sanitizer (~> 1.0, >= 1.2.0)
|
20
|
-
actionview (6.1.
|
21
|
-
activesupport (= 6.1.
|
20
|
+
actionview (6.1.4.1)
|
21
|
+
activesupport (= 6.1.4.1)
|
22
22
|
builder (~> 3.1)
|
23
23
|
erubi (~> 1.4)
|
24
24
|
rails-dom-testing (~> 2.0)
|
@@ -27,20 +27,20 @@ GEM
|
|
27
27
|
actionpack (>= 3.0.2, < 7.0)
|
28
28
|
activemodel (>= 3.0.2, < 7.0)
|
29
29
|
activesupport (>= 3.0.2, < 7.0)
|
30
|
-
activemodel (6.1.
|
31
|
-
activesupport (= 6.1.
|
32
|
-
activesupport (6.1.
|
30
|
+
activemodel (6.1.4.1)
|
31
|
+
activesupport (= 6.1.4.1)
|
32
|
+
activesupport (6.1.4.1)
|
33
33
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
34
34
|
i18n (>= 1.6, < 2)
|
35
35
|
minitest (>= 5.1)
|
36
36
|
tzinfo (~> 2.0)
|
37
37
|
zeitwerk (~> 2.3)
|
38
|
-
addressable (2.
|
38
|
+
addressable (2.8.0)
|
39
39
|
public_suffix (>= 2.0.2, < 5.0)
|
40
40
|
ansi (1.5.0)
|
41
41
|
builder (3.2.4)
|
42
42
|
coderay (1.1.3)
|
43
|
-
concurrent-ruby (1.1.
|
43
|
+
concurrent-ruby (1.1.9)
|
44
44
|
crack (0.4.5)
|
45
45
|
rexml
|
46
46
|
crass (1.0.6)
|
@@ -52,13 +52,13 @@ GEM
|
|
52
52
|
multi_xml (>= 0.5.2)
|
53
53
|
i18n (1.8.10)
|
54
54
|
concurrent-ruby (~> 1.0)
|
55
|
-
loofah (2.
|
55
|
+
loofah (2.12.0)
|
56
56
|
crass (~> 1.0.2)
|
57
57
|
nokogiri (>= 1.5.9)
|
58
58
|
method_source (1.0.0)
|
59
59
|
mime-types (3.3.1)
|
60
60
|
mime-types-data (~> 3.2015)
|
61
|
-
mime-types-data (3.2021.
|
61
|
+
mime-types-data (3.2021.0901)
|
62
62
|
minitest (5.14.4)
|
63
63
|
minitest-focus (1.1.2)
|
64
64
|
minitest (>= 4, < 6)
|
@@ -69,7 +69,7 @@ GEM
|
|
69
69
|
ruby-progressbar
|
70
70
|
mocha (1.11.2)
|
71
71
|
multi_xml (0.6.0)
|
72
|
-
nokogiri (1.
|
72
|
+
nokogiri (1.12.4-x86_64-linux)
|
73
73
|
racc (~> 1.4)
|
74
74
|
options (2.3.2)
|
75
75
|
progress_bar (1.3.3)
|
@@ -86,9 +86,9 @@ GEM
|
|
86
86
|
rails-dom-testing (2.0.3)
|
87
87
|
activesupport (>= 4.2.0)
|
88
88
|
nokogiri (>= 1.6)
|
89
|
-
rails-html-sanitizer (1.
|
89
|
+
rails-html-sanitizer (1.4.2)
|
90
90
|
loofah (~> 2.3)
|
91
|
-
rake (
|
91
|
+
rake (13.0.6)
|
92
92
|
rexml (3.2.5)
|
93
93
|
ruby-progressbar (1.11.0)
|
94
94
|
timecop (0.9.4)
|
@@ -111,7 +111,7 @@ DEPENDENCIES
|
|
111
111
|
mocha (~> 1.11.2)
|
112
112
|
pry (~> 0.13)
|
113
113
|
pulse-downloader!
|
114
|
-
rake (~>
|
114
|
+
rake (~> 13.0)
|
115
115
|
timecop (~> 0.9.1)
|
116
116
|
webmock (~> 3.8.3)
|
117
117
|
|
data/README.md
CHANGED
@@ -29,6 +29,7 @@ client = Pulse::Downloader::Client.new(
|
|
29
29
|
save_data: true,
|
30
30
|
save_path: '',
|
31
31
|
read_from_save_path: false,
|
32
|
+
traverse_folders: false,
|
32
33
|
verify_ssl: true,
|
33
34
|
drop_exitsing_files_in_path: false,
|
34
35
|
save_and_dont_return: true,
|
@@ -38,6 +39,8 @@ client = Pulse::Downloader::Client.new(
|
|
38
39
|
)
|
39
40
|
|
40
41
|
client.call!
|
42
|
+
|
43
|
+
client.file_paths # Will give you the list of file paths
|
41
44
|
```
|
42
45
|
|
43
46
|
## Development
|
@@ -12,6 +12,7 @@ module Pulse
|
|
12
12
|
:save_data,
|
13
13
|
:save_path,
|
14
14
|
:read_from_save_path,
|
15
|
+
:traverse_folders,
|
15
16
|
:verify_ssl,
|
16
17
|
:headers,
|
17
18
|
:drop_exitsing_files_in_path,
|
@@ -20,7 +21,8 @@ module Pulse
|
|
20
21
|
:start_time,
|
21
22
|
:end_time,
|
22
23
|
:progress_bar,
|
23
|
-
:base_url
|
24
|
+
:base_url,
|
25
|
+
:file_paths
|
24
26
|
|
25
27
|
# Does not continue downloads-
|
26
28
|
# Will only save once the file has been downloaded in memory
|
@@ -28,7 +30,6 @@ module Pulse
|
|
28
30
|
# TODO: Validation
|
29
31
|
# TODO: Retry
|
30
32
|
# TODO: DNS
|
31
|
-
# TODO: Multiple filetypes
|
32
33
|
# TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
|
33
34
|
def initialize(url:,
|
34
35
|
file_type:,
|
@@ -36,6 +37,7 @@ module Pulse
|
|
36
37
|
save_data: false,
|
37
38
|
save_path: '',
|
38
39
|
read_from_save_path: false,
|
40
|
+
traverse_folders: false,
|
39
41
|
verify_ssl: true,
|
40
42
|
headers: nil,
|
41
43
|
drop_exitsing_files_in_path: false,
|
@@ -49,6 +51,7 @@ module Pulse
|
|
49
51
|
@save_data = save_data
|
50
52
|
@save_path = save_path
|
51
53
|
@read_from_save_path = read_from_save_path
|
54
|
+
@traverse_folders = traverse_folders
|
52
55
|
@verify_ssl = verify_ssl
|
53
56
|
@headers = headers
|
54
57
|
@drop_exitsing_files_in_path = drop_exitsing_files_in_path
|
@@ -66,12 +69,14 @@ module Pulse
|
|
66
69
|
def call
|
67
70
|
return false unless valid?
|
68
71
|
|
72
|
+
@file_paths = fetch_file_paths
|
73
|
+
|
69
74
|
if @progress_bar
|
70
|
-
@progress_bar = ::ProgressBar.new(
|
75
|
+
@progress_bar = ::ProgressBar.new(file_paths.size)
|
71
76
|
end
|
72
77
|
|
73
|
-
|
74
|
-
download(file_path, @progress_bar)
|
78
|
+
file_paths.map do |file_path|
|
79
|
+
download(file_path, @progress_bar) if save_data
|
75
80
|
@progress_bar.increment!
|
76
81
|
end
|
77
82
|
end
|
@@ -2,9 +2,25 @@ module Pulse
|
|
2
2
|
module Downloader
|
3
3
|
module WebPageParser
|
4
4
|
def fetch_file_paths(custom_path_root=nil)
|
5
|
+
if traverse_folders
|
6
|
+
fetch_folders(url).each do |folder_url|
|
7
|
+
fetch_and_parse_response(folder_url, custom_path_root)
|
8
|
+
end
|
9
|
+
else
|
10
|
+
fetch_and_parse_response(url, custom_path_root)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def fetch_folders(base_url)
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
def fetch_and_parse_response(folder_url, custom_path_root)
|
5
21
|
@start_time = get_micro_second_time
|
6
22
|
|
7
|
-
response = HTTParty.get(
|
23
|
+
response = HTTParty.get(folder_url, verify: verify_ssl, headers: headers)
|
8
24
|
|
9
25
|
@end_time = get_micro_second_time
|
10
26
|
|
@@ -12,6 +28,10 @@ module Pulse
|
|
12
28
|
print_time
|
13
29
|
end
|
14
30
|
|
31
|
+
parse_response(response, custom_path_root, file_type)
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_response(response, custom_path_root, file_type)
|
15
35
|
if file_type.is_a?(Array)
|
16
36
|
file_type.flat_map do |type|
|
17
37
|
extract_file_urls(response, custom_path_root, type)
|
@@ -21,8 +41,6 @@ module Pulse
|
|
21
41
|
end
|
22
42
|
end
|
23
43
|
|
24
|
-
private
|
25
|
-
|
26
44
|
def extract_file_urls(response, custom_path_root, type)
|
27
45
|
return [] if response.body.nil? || response.body.empty?
|
28
46
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pulse-downloader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.34
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- trex22
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|