pulse-downloader 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82910e4a2a00c0519958083ae3380241bb4c0cd3166d513d311c338227e4184f
4
- data.tar.gz: c7642a1d110693f56ce933a466312fe851647ede790cba74a18c8157137c2a7e
3
+ metadata.gz: 143241844753713ded3ebdb7fccf727a20ab2055ad9c40e9936b88f9ecd4a0a5
4
+ data.tar.gz: 471f4432385de2dc96223ed014e5d3c88adccfda7c98a591b1cd2c915dbe3256
5
5
  SHA512:
6
- metadata.gz: 1ec8ec9d18dabd67e9c7e01abf5718206512e9169b2cc38c4ec2654b1ae289d7955945995ea55fda637c7a6363f8f053baa681a6ba463c1b6f3a2519bf1b714c
7
- data.tar.gz: 37dc7aa8612c40f696c6902e3e791811ac801106a1c45243ba5597f73bbecd7198b077d8d37340a7e010c995252c092ea3f8a869498cac0844762d5d72412ab3
6
+ metadata.gz: f26e86e5e59e24be532fc542e2de26c33d69dd2e4bb264ef6fe1f28f46d68782d5d0838a80db769912fc202598dff4e40aad10a856e4659e01105c73997b7ed3
7
+ data.tar.gz: a29a979395ce6b6d7311e7150e22f1ed11a62cd317e19aa42a16241674e7abe33397e8d210882aeb4fa6f7835e3a32e321ccdadf13ecc20219c7a9065a38f9f5
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pulse-downloader (0.1.0)
4
+ pulse-downloader (0.1.3)
5
5
  active_attr (~> 0.15)
6
6
  httparty (~> 0.18)
7
7
  nokogiri (~> 1.10.9)
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # Pulse::Downloader
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/pulse/downloader`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
3
+ This is a library to download a specific group of files linked to on an html page.
6
4
 
7
5
  ## Installation
8
6
 
@@ -22,7 +20,21 @@ Or install it yourself as:
22
20
 
23
21
  ## Usage
24
22
 
25
- TODO: Write usage instructions here
23
+ ```ruby
24
+ require 'pulse/downloader'
25
+
26
+ client = Pulse::Downloader::Client.new(
27
+ url: '',
28
+ file_type: 'zip',
29
+ save_data: true,
30
+ save_path: '',
31
+ read_from_save_path: false,
32
+ verify_ssl: true,
33
+ drop_exitsing_files_in_path: false,
34
+ save_and_dont_return: true
35
+ report_time: false
36
+ )
37
+ ```
26
38
 
27
39
  ## Development
28
40
 
@@ -34,7 +46,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
34
46
 
35
47
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/pulse-downloader. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/pulse-downloader/blob/master/CODE_OF_CONDUCT.md).
36
48
 
37
-
38
49
  ## License
39
50
 
40
51
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
 
4
4
  require "pulse/downloader/version"
5
5
  require 'pulse/downloader/web_page_parser'
6
+ require 'pulse/downloader/file_checker'
6
7
  require 'pulse/downloader/file_downloader'
7
8
  require 'pulse/downloader/client'
8
9
 
@@ -2,16 +2,45 @@ module Pulse
2
2
  module Downloader
3
3
  class Client
4
4
  include ::Pulse::Downloader::WebPageParser
5
+ include ::Pulse::Downloader::FileChecker
5
6
  include ::Pulse::Downloader::FileDownloader
6
7
 
7
- attr_reader :path, :file_type, :save_data, :save_path, :read_from_save_path
8
+ attr_reader :url,
9
+ :file_type,
10
+ :save_data,
11
+ :save_path,
12
+ :read_from_save_path,
13
+ :verify_ssl,
14
+ :drop_exitsing_files_in_path,
15
+ :report_time,
16
+ :start_time,
17
+ :end_time
8
18
 
9
- def initialize(path:, file_type:, save_data: false, save_path: '', read_from_save_path: false)
10
- @path = path
19
+ # Does not continue downloads-
20
+ # Will only save once the file has been downloaded in memory
21
+
22
+ # TODO: Add in progress bar
23
+ # TODO: Validation
24
+ # TODO: Retry
25
+ # TODO: DNS
26
+ def initialize(url:,
27
+ file_type:,
28
+ save_data: false,
29
+ save_path: '',
30
+ read_from_save_path: false,
31
+ verify_ssl: true,
32
+ drop_exitsing_files_in_path: false,
33
+ save_and_dont_return: true,
34
+ report_time: false)
35
+
36
+ @url = url
11
37
  @file_type = file_type
12
38
  @save_data = save_data
13
39
  @save_path = save_path
14
40
  @read_from_save_path = read_from_save_path
41
+ @verify_ssl = verify_ssl
42
+ @drop_exitsing_files_in_path = drop_exitsing_files_in_path
43
+ @report_time = report_time
15
44
  end
16
45
 
17
46
  def call!
@@ -33,11 +62,11 @@ module Pulse
33
62
  private
34
63
 
35
64
  def get_micro_second_time
36
- (Time.now.to_f * 1000000).to_i
65
+ (Time.now.to_f * 1000).to_i
37
66
  end
38
67
 
39
- def compute_filename(file_path)
40
- file_path.scan(/[\/]\S+/).last
68
+ def print_time
69
+ puts "Request time: #{end_time - start_time} ms."
41
70
  end
42
71
  end
43
72
  end
@@ -0,0 +1,27 @@
1
+ module Pulse
2
+ module Downloader
3
+ module FileChecker
4
+ def file_path_in_file_list?(file_path)
5
+ return false unless drop_exitsing_files_in_path && save_data
6
+
7
+ list_files_in(save_path).include?(compute_save_path(file_path))
8
+ end
9
+
10
+ private
11
+
12
+ def compute_save_path(url)
13
+ "#{save_path}/#{compute_filename(url)}".gsub('//', '/')
14
+ end
15
+
16
+ def compute_filename(file_path)
17
+ file_path.scan(/[\/]\S+/).last
18
+ end
19
+
20
+ def list_files_in(path)
21
+ `ls #{path}`.split("\n").map do |filename|
22
+ "#{path}/#{filename}".gsub('//', '/')
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,29 +1,71 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module FileDownloader
4
- # save_path are defined in client.rb
4
+ # save_path and verify_ssl are defined in client.rb
5
5
  def download(file_path)
6
6
  raise "save_path is undefined" if save_data && save_path == ''
7
+ return if file_path_in_file_list?(file_path) # skip downloading the file
7
8
 
8
- start_time = get_micro_second_time
9
+ @start_time = get_micro_second_time
9
10
 
10
- file_data = HTTParty.get(file_path)
11
+ file_data = HTTParty.get(compute_file_link(file_path), verify: verify_ssl)
11
12
 
12
- # TODO: Use the time
13
- end_time = get_micro_second_time
13
+ @end_time = get_micro_second_time
14
+
15
+ if report_time
16
+ print_time
17
+ end
14
18
 
15
19
  if save_data
16
- File.open("#{save_path}/#{compute_filename(file_path)}", 'wb') do |file|
20
+ File.open(compute_save_path(file_path), 'wb') do |file|
17
21
  file.write(file_data.body)
18
22
  end
23
+
24
+ return true if save_and_dont_return
19
25
  end
20
26
 
21
27
  file_data
22
28
  end
23
29
 
30
+ def fetch_save_paths
31
+ fetch_file_paths.map do |file_path|
32
+ "#{save_path}/#{compute_filename(file_path)}"
33
+ end
34
+ end
35
+
24
36
  def compute_hash_of(data)
25
37
  { data: data }.hash
26
38
  end
39
+
40
+ private
41
+
42
+ def compute_file_link(file_path)
43
+ if section?(file_path)
44
+ raise 'invalid download path'
45
+ elsif absolute?(file_path)
46
+ file_path
47
+ elsif relative?(file_path)
48
+ "#{url}/#{file_path}"
49
+ else
50
+ "#{url}/#{file_path}"
51
+ end
52
+ end
53
+
54
+ def absolute?(file_path)
55
+ file_path.include?('http://') ||
56
+ file_path.include?('https://') ||
57
+ file_path.include?('ftp://') ||
58
+ file_path.include?('sftp://')||
59
+ file_path.include?('file://')
60
+ end
61
+
62
+ def relative?(file_path)
63
+ file_path[0] == '/'
64
+ end
65
+
66
+ def section?(file_path)
67
+ file_path[0] == '#'
68
+ end
27
69
  end
28
70
  end
29
71
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.0"
3
+ VERSION = "0.1.3"
4
4
  end
5
5
  end
@@ -2,24 +2,30 @@ module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
4
  def fetch_file_paths
5
- start_time = get_micro_second_time
5
+ @start_time = get_micro_second_time
6
6
 
7
- response = HTTParty.get(@path)
7
+ response = HTTParty.get(url, verify: verify_ssl)
8
8
 
9
- # TODO: Use the time
10
- end_time = get_micro_second_time
11
- extract_file_urls(response, start_time, end_time)
9
+ @end_time = get_micro_second_time
10
+
11
+ if report_time
12
+ print_time
13
+ end
14
+
15
+ extract_file_urls(response)
12
16
  end
13
17
 
14
18
  private
15
19
 
16
- def extract_file_urls(response, start_time, end_time)
17
- parse_html(response)
20
+ def extract_file_urls(response)
21
+ return [] if response.body.nil? || response.body.empty?
22
+
23
+ parse_html(response.body)
18
24
  .css('a')
19
25
  .to_a
20
26
  .map { |link| link['href'] }
21
27
  .compact
22
- .select { |link| link.include? @file_type }
28
+ .select { |link| link.include? file_type }
23
29
  end
24
30
 
25
31
  def parse_html(raw_html)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-15 00:00:00.000000000 Z
11
+ date: 2020-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -197,6 +197,7 @@ files:
197
197
  - bin/setup
198
198
  - lib/pulse/downloader.rb
199
199
  - lib/pulse/downloader/client.rb
200
+ - lib/pulse/downloader/file_checker.rb
200
201
  - lib/pulse/downloader/file_downloader.rb
201
202
  - lib/pulse/downloader/version.rb
202
203
  - lib/pulse/downloader/web_page_parser.rb