pulse-downloader 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +16 -5
- data/lib/pulse/downloader.rb +1 -0
- data/lib/pulse/downloader/client.rb +35 -6
- data/lib/pulse/downloader/file_checker.rb +27 -0
- data/lib/pulse/downloader/file_downloader.rb +48 -6
- data/lib/pulse/downloader/version.rb +1 -1
- data/lib/pulse/downloader/web_page_parser.rb +14 -8
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 143241844753713ded3ebdb7fccf727a20ab2055ad9c40e9936b88f9ecd4a0a5
|
4
|
+
data.tar.gz: 471f4432385de2dc96223ed014e5d3c88adccfda7c98a591b1cd2c915dbe3256
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f26e86e5e59e24be532fc542e2de26c33d69dd2e4bb264ef6fe1f28f46d68782d5d0838a80db769912fc202598dff4e40aad10a856e4659e01105c73997b7ed3
|
7
|
+
data.tar.gz: a29a979395ce6b6d7311e7150e22f1ed11a62cd317e19aa42a16241674e7abe33397e8d210882aeb4fa6f7835e3a32e321ccdadf13ecc20219c7a9065a38f9f5
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# Pulse::Downloader
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
This is a library to download a specific group of files linked to on an html page.
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -22,7 +20,21 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
|
23
|
+
```ruby
|
24
|
+
require 'pulse/downloader'
|
25
|
+
|
26
|
+
client = Pulse::Downloader::Client.new(
|
27
|
+
url: '',
|
28
|
+
file_type: 'zip',
|
29
|
+
save_data: true,
|
30
|
+
save_path: '',
|
31
|
+
read_from_save_path: false,
|
32
|
+
verify_ssl: true,
|
33
|
+
drop_exitsing_files_in_path: false,
|
34
|
+
save_and_dont_return: true
|
35
|
+
report_time: false
|
36
|
+
)
|
37
|
+
```
|
26
38
|
|
27
39
|
## Development
|
28
40
|
|
@@ -34,7 +46,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
34
46
|
|
35
47
|
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/pulse-downloader. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/pulse-downloader/blob/master/CODE_OF_CONDUCT.md).
|
36
48
|
|
37
|
-
|
38
49
|
## License
|
39
50
|
|
40
51
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/lib/pulse/downloader.rb
CHANGED
@@ -2,16 +2,45 @@ module Pulse
|
|
2
2
|
module Downloader
|
3
3
|
class Client
|
4
4
|
include ::Pulse::Downloader::WebPageParser
|
5
|
+
include ::Pulse::Downloader::FileChecker
|
5
6
|
include ::Pulse::Downloader::FileDownloader
|
6
7
|
|
7
|
-
attr_reader :
|
8
|
+
attr_reader :url,
|
9
|
+
:file_type,
|
10
|
+
:save_data,
|
11
|
+
:save_path,
|
12
|
+
:read_from_save_path,
|
13
|
+
:verify_ssl,
|
14
|
+
:drop_exitsing_files_in_path,
|
15
|
+
:report_time,
|
16
|
+
:start_time,
|
17
|
+
:end_time
|
8
18
|
|
9
|
-
|
10
|
-
|
19
|
+
# Does not continue downloads-
|
20
|
+
# Will only save once the file has been downloaded in memory
|
21
|
+
|
22
|
+
# TODO: Add in progress bar
|
23
|
+
# TODO: Validation
|
24
|
+
# TODO: Retry
|
25
|
+
# TODO: DNS
|
26
|
+
def initialize(url:,
|
27
|
+
file_type:,
|
28
|
+
save_data: false,
|
29
|
+
save_path: '',
|
30
|
+
read_from_save_path: false,
|
31
|
+
verify_ssl: true,
|
32
|
+
drop_exitsing_files_in_path: false,
|
33
|
+
save_and_dont_return: true,
|
34
|
+
report_time: false)
|
35
|
+
|
36
|
+
@url = url
|
11
37
|
@file_type = file_type
|
12
38
|
@save_data = save_data
|
13
39
|
@save_path = save_path
|
14
40
|
@read_from_save_path = read_from_save_path
|
41
|
+
@verify_ssl = verify_ssl
|
42
|
+
@drop_exitsing_files_in_path = drop_exitsing_files_in_path
|
43
|
+
@report_time = report_time
|
15
44
|
end
|
16
45
|
|
17
46
|
def call!
|
@@ -33,11 +62,11 @@ module Pulse
|
|
33
62
|
private
|
34
63
|
|
35
64
|
def get_micro_second_time
|
36
|
-
(Time.now.to_f *
|
65
|
+
(Time.now.to_f * 1000).to_i
|
37
66
|
end
|
38
67
|
|
39
|
-
def
|
40
|
-
|
68
|
+
def print_time
|
69
|
+
puts "Request time: #{end_time - start_time} ms."
|
41
70
|
end
|
42
71
|
end
|
43
72
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Pulse
|
2
|
+
module Downloader
|
3
|
+
module FileChecker
|
4
|
+
def file_path_in_file_list?(file_path)
|
5
|
+
return false unless drop_exitsing_files_in_path && save_data
|
6
|
+
|
7
|
+
list_files_in(save_path).include?(compute_save_path(file_path))
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def compute_save_path(url)
|
13
|
+
"#{save_path}/#{compute_filename(url)}".gsub('//', '/')
|
14
|
+
end
|
15
|
+
|
16
|
+
def compute_filename(file_path)
|
17
|
+
file_path.scan(/[\/]\S+/).last
|
18
|
+
end
|
19
|
+
|
20
|
+
def list_files_in(path)
|
21
|
+
`ls #{path}`.split("\n").map do |filename|
|
22
|
+
"#{path}/#{filename}".gsub('//', '/')
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -1,29 +1,71 @@
|
|
1
1
|
module Pulse
|
2
2
|
module Downloader
|
3
3
|
module FileDownloader
|
4
|
-
# save_path are defined in client.rb
|
4
|
+
# save_path and verify_ssl are defined in client.rb
|
5
5
|
def download(file_path)
|
6
6
|
raise "save_path is undefined" if save_data && save_path == ''
|
7
|
+
return if file_path_in_file_list?(file_path) # skip downloading the file
|
7
8
|
|
8
|
-
start_time = get_micro_second_time
|
9
|
+
@start_time = get_micro_second_time
|
9
10
|
|
10
|
-
file_data = HTTParty.get(file_path)
|
11
|
+
file_data = HTTParty.get(compute_file_link(file_path), verify: verify_ssl)
|
11
12
|
|
12
|
-
|
13
|
-
|
13
|
+
@end_time = get_micro_second_time
|
14
|
+
|
15
|
+
if report_time
|
16
|
+
print_time
|
17
|
+
end
|
14
18
|
|
15
19
|
if save_data
|
16
|
-
File.open(
|
20
|
+
File.open(compute_save_path(file_path), 'wb') do |file|
|
17
21
|
file.write(file_data.body)
|
18
22
|
end
|
23
|
+
|
24
|
+
return true if save_and_dont_return
|
19
25
|
end
|
20
26
|
|
21
27
|
file_data
|
22
28
|
end
|
23
29
|
|
30
|
+
def fetch_save_paths
|
31
|
+
fetch_file_paths.map do |file_path|
|
32
|
+
"#{save_path}/#{compute_filename(file_path)}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
24
36
|
def compute_hash_of(data)
|
25
37
|
{ data: data }.hash
|
26
38
|
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def compute_file_link(file_path)
|
43
|
+
if section?(file_path)
|
44
|
+
raise 'invalid download path'
|
45
|
+
elsif absolute?(file_path)
|
46
|
+
file_path
|
47
|
+
elsif relative?(file_path)
|
48
|
+
"#{url}/#{file_path}"
|
49
|
+
else
|
50
|
+
"#{url}/#{file_path}"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def absolute?(file_path)
|
55
|
+
file_path.include?('http://') ||
|
56
|
+
file_path.include?('https://') ||
|
57
|
+
file_path.include?('ftp://') ||
|
58
|
+
file_path.include?('sftp://')||
|
59
|
+
file_path.include?('file://')
|
60
|
+
end
|
61
|
+
|
62
|
+
def relative?(file_path)
|
63
|
+
file_path[0] == '/'
|
64
|
+
end
|
65
|
+
|
66
|
+
def section?(file_path)
|
67
|
+
file_path[0] == '#'
|
68
|
+
end
|
27
69
|
end
|
28
70
|
end
|
29
71
|
end
|
@@ -2,24 +2,30 @@ module Pulse
|
|
2
2
|
module Downloader
|
3
3
|
module WebPageParser
|
4
4
|
def fetch_file_paths
|
5
|
-
start_time = get_micro_second_time
|
5
|
+
@start_time = get_micro_second_time
|
6
6
|
|
7
|
-
response = HTTParty.get(
|
7
|
+
response = HTTParty.get(url, verify: verify_ssl)
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
@end_time = get_micro_second_time
|
10
|
+
|
11
|
+
if report_time
|
12
|
+
print_time
|
13
|
+
end
|
14
|
+
|
15
|
+
extract_file_urls(response)
|
12
16
|
end
|
13
17
|
|
14
18
|
private
|
15
19
|
|
16
|
-
def extract_file_urls(response
|
17
|
-
|
20
|
+
def extract_file_urls(response)
|
21
|
+
return [] if response.body.nil? || response.body.empty?
|
22
|
+
|
23
|
+
parse_html(response.body)
|
18
24
|
.css('a')
|
19
25
|
.to_a
|
20
26
|
.map { |link| link['href'] }
|
21
27
|
.compact
|
22
|
-
.select { |link| link.include?
|
28
|
+
.select { |link| link.include? file_type }
|
23
29
|
end
|
24
30
|
|
25
31
|
def parse_html(raw_html)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pulse-downloader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- trex22
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -197,6 +197,7 @@ files:
|
|
197
197
|
- bin/setup
|
198
198
|
- lib/pulse/downloader.rb
|
199
199
|
- lib/pulse/downloader/client.rb
|
200
|
+
- lib/pulse/downloader/file_checker.rb
|
200
201
|
- lib/pulse/downloader/file_downloader.rb
|
201
202
|
- lib/pulse/downloader/version.rb
|
202
203
|
- lib/pulse/downloader/web_page_parser.rb
|