pulse-downloader 0.1.0 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82910e4a2a00c0519958083ae3380241bb4c0cd3166d513d311c338227e4184f
4
- data.tar.gz: c7642a1d110693f56ce933a466312fe851647ede790cba74a18c8157137c2a7e
3
+ metadata.gz: 143241844753713ded3ebdb7fccf727a20ab2055ad9c40e9936b88f9ecd4a0a5
4
+ data.tar.gz: 471f4432385de2dc96223ed014e5d3c88adccfda7c98a591b1cd2c915dbe3256
5
5
  SHA512:
6
- metadata.gz: 1ec8ec9d18dabd67e9c7e01abf5718206512e9169b2cc38c4ec2654b1ae289d7955945995ea55fda637c7a6363f8f053baa681a6ba463c1b6f3a2519bf1b714c
7
- data.tar.gz: 37dc7aa8612c40f696c6902e3e791811ac801106a1c45243ba5597f73bbecd7198b077d8d37340a7e010c995252c092ea3f8a869498cac0844762d5d72412ab3
6
+ metadata.gz: f26e86e5e59e24be532fc542e2de26c33d69dd2e4bb264ef6fe1f28f46d68782d5d0838a80db769912fc202598dff4e40aad10a856e4659e01105c73997b7ed3
7
+ data.tar.gz: a29a979395ce6b6d7311e7150e22f1ed11a62cd317e19aa42a16241674e7abe33397e8d210882aeb4fa6f7835e3a32e321ccdadf13ecc20219c7a9065a38f9f5
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pulse-downloader (0.1.0)
4
+ pulse-downloader (0.1.3)
5
5
  active_attr (~> 0.15)
6
6
  httparty (~> 0.18)
7
7
  nokogiri (~> 1.10.9)
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # Pulse::Downloader
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/pulse/downloader`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
3
+ This is a library to download a specific group of files linked to on an html page.
6
4
 
7
5
  ## Installation
8
6
 
@@ -22,7 +20,21 @@ Or install it yourself as:
22
20
 
23
21
  ## Usage
24
22
 
25
- TODO: Write usage instructions here
23
+ ```ruby
24
+ require 'pulse/downloader'
25
+
26
+ client = Pulse::Downloader::Client.new(
27
+ url: '',
28
+ file_type: 'zip',
29
+ save_data: true,
30
+ save_path: '',
31
+ read_from_save_path: false,
32
+ verify_ssl: true,
33
+ drop_exitsing_files_in_path: false,
34
+ save_and_dont_return: true
35
+ report_time: false
36
+ )
37
+ ```
26
38
 
27
39
  ## Development
28
40
 
@@ -34,7 +46,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
34
46
 
35
47
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/pulse-downloader. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/pulse-downloader/blob/master/CODE_OF_CONDUCT.md).
36
48
 
37
-
38
49
  ## License
39
50
 
40
51
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
 
4
4
  require "pulse/downloader/version"
5
5
  require 'pulse/downloader/web_page_parser'
6
+ require 'pulse/downloader/file_checker'
6
7
  require 'pulse/downloader/file_downloader'
7
8
  require 'pulse/downloader/client'
8
9
 
@@ -2,16 +2,45 @@ module Pulse
2
2
  module Downloader
3
3
  class Client
4
4
  include ::Pulse::Downloader::WebPageParser
5
+ include ::Pulse::Downloader::FileChecker
5
6
  include ::Pulse::Downloader::FileDownloader
6
7
 
7
- attr_reader :path, :file_type, :save_data, :save_path, :read_from_save_path
8
+ attr_reader :url,
9
+ :file_type,
10
+ :save_data,
11
+ :save_path,
12
+ :read_from_save_path,
13
+ :verify_ssl,
14
+ :drop_exitsing_files_in_path,
15
+ :report_time,
16
+ :start_time,
17
+ :end_time
8
18
 
9
- def initialize(path:, file_type:, save_data: false, save_path: '', read_from_save_path: false)
10
- @path = path
19
+ # Does not continue downloads-
20
+ # Will only save once the file has been downloaded in memory
21
+
22
+ # TODO: Add in progress bar
23
+ # TODO: Validation
24
+ # TODO: Retry
25
+ # TODO: DNS
26
+ def initialize(url:,
27
+ file_type:,
28
+ save_data: false,
29
+ save_path: '',
30
+ read_from_save_path: false,
31
+ verify_ssl: true,
32
+ drop_exitsing_files_in_path: false,
33
+ save_and_dont_return: true,
34
+ report_time: false)
35
+
36
+ @url = url
11
37
  @file_type = file_type
12
38
  @save_data = save_data
13
39
  @save_path = save_path
14
40
  @read_from_save_path = read_from_save_path
41
+ @verify_ssl = verify_ssl
42
+ @drop_exitsing_files_in_path = drop_exitsing_files_in_path
43
+ @report_time = report_time
15
44
  end
16
45
 
17
46
  def call!
@@ -33,11 +62,11 @@ module Pulse
33
62
  private
34
63
 
35
64
  def get_micro_second_time
36
- (Time.now.to_f * 1000000).to_i
65
+ (Time.now.to_f * 1000).to_i
37
66
  end
38
67
 
39
- def compute_filename(file_path)
40
- file_path.scan(/[\/]\S+/).last
68
+ def print_time
69
+ puts "Request time: #{end_time - start_time} ms."
41
70
  end
42
71
  end
43
72
  end
@@ -0,0 +1,27 @@
1
+ module Pulse
2
+ module Downloader
3
+ module FileChecker
4
+ def file_path_in_file_list?(file_path)
5
+ return false unless drop_exitsing_files_in_path && save_data
6
+
7
+ list_files_in(save_path).include?(compute_save_path(file_path))
8
+ end
9
+
10
+ private
11
+
12
+ def compute_save_path(url)
13
+ "#{save_path}/#{compute_filename(url)}".gsub('//', '/')
14
+ end
15
+
16
+ def compute_filename(file_path)
17
+ file_path.scan(/[\/]\S+/).last
18
+ end
19
+
20
+ def list_files_in(path)
21
+ `ls #{path}`.split("\n").map do |filename|
22
+ "#{path}/#{filename}".gsub('//', '/')
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,29 +1,71 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module FileDownloader
4
- # save_path are defined in client.rb
4
+ # save_path and verify_ssl are defined in client.rb
5
5
  def download(file_path)
6
6
  raise "save_path is undefined" if save_data && save_path == ''
7
+ return if file_path_in_file_list?(file_path) # skip downloading the file
7
8
 
8
- start_time = get_micro_second_time
9
+ @start_time = get_micro_second_time
9
10
 
10
- file_data = HTTParty.get(file_path)
11
+ file_data = HTTParty.get(compute_file_link(file_path), verify: verify_ssl)
11
12
 
12
- # TODO: Use the time
13
- end_time = get_micro_second_time
13
+ @end_time = get_micro_second_time
14
+
15
+ if report_time
16
+ print_time
17
+ end
14
18
 
15
19
  if save_data
16
- File.open("#{save_path}/#{compute_filename(file_path)}", 'wb') do |file|
20
+ File.open(compute_save_path(file_path), 'wb') do |file|
17
21
  file.write(file_data.body)
18
22
  end
23
+
24
+ return true if save_and_dont_return
19
25
  end
20
26
 
21
27
  file_data
22
28
  end
23
29
 
30
+ def fetch_save_paths
31
+ fetch_file_paths.map do |file_path|
32
+ "#{save_path}/#{compute_filename(file_path)}"
33
+ end
34
+ end
35
+
24
36
  def compute_hash_of(data)
25
37
  { data: data }.hash
26
38
  end
39
+
40
+ private
41
+
42
+ def compute_file_link(file_path)
43
+ if section?(file_path)
44
+ raise 'invalid download path'
45
+ elsif absolute?(file_path)
46
+ file_path
47
+ elsif relative?(file_path)
48
+ "#{url}/#{file_path}"
49
+ else
50
+ "#{url}/#{file_path}"
51
+ end
52
+ end
53
+
54
+ def absolute?(file_path)
55
+ file_path.include?('http://') ||
56
+ file_path.include?('https://') ||
57
+ file_path.include?('ftp://') ||
58
+ file_path.include?('sftp://')||
59
+ file_path.include?('file://')
60
+ end
61
+
62
+ def relative?(file_path)
63
+ file_path[0] == '/'
64
+ end
65
+
66
+ def section?(file_path)
67
+ file_path[0] == '#'
68
+ end
27
69
  end
28
70
  end
29
71
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.0"
3
+ VERSION = "0.1.3"
4
4
  end
5
5
  end
@@ -2,24 +2,30 @@ module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
4
  def fetch_file_paths
5
- start_time = get_micro_second_time
5
+ @start_time = get_micro_second_time
6
6
 
7
- response = HTTParty.get(@path)
7
+ response = HTTParty.get(url, verify: verify_ssl)
8
8
 
9
- # TODO: Use the time
10
- end_time = get_micro_second_time
11
- extract_file_urls(response, start_time, end_time)
9
+ @end_time = get_micro_second_time
10
+
11
+ if report_time
12
+ print_time
13
+ end
14
+
15
+ extract_file_urls(response)
12
16
  end
13
17
 
14
18
  private
15
19
 
16
- def extract_file_urls(response, start_time, end_time)
17
- parse_html(response)
20
+ def extract_file_urls(response)
21
+ return [] if response.body.nil? || response.body.empty?
22
+
23
+ parse_html(response.body)
18
24
  .css('a')
19
25
  .to_a
20
26
  .map { |link| link['href'] }
21
27
  .compact
22
- .select { |link| link.include? @file_type }
28
+ .select { |link| link.include? file_type }
23
29
  end
24
30
 
25
31
  def parse_html(raw_html)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-15 00:00:00.000000000 Z
11
+ date: 2020-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -197,6 +197,7 @@ files:
197
197
  - bin/setup
198
198
  - lib/pulse/downloader.rb
199
199
  - lib/pulse/downloader/client.rb
200
+ - lib/pulse/downloader/file_checker.rb
200
201
  - lib/pulse/downloader/file_downloader.rb
201
202
  - lib/pulse/downloader/version.rb
202
203
  - lib/pulse/downloader/web_page_parser.rb