pulse-downloader 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82910e4a2a00c0519958083ae3380241bb4c0cd3166d513d311c338227e4184f
4
- data.tar.gz: c7642a1d110693f56ce933a466312fe851647ede790cba74a18c8157137c2a7e
3
+ metadata.gz: 528b545a62356f1b1d9c1786f975b059f0387c3fecd58d6fe211b62405922d56
4
+ data.tar.gz: a173671066bb527aaf9abdd8e68be48ab0e849a22d70cce4b4abaf0271ba425a
5
5
  SHA512:
6
- metadata.gz: 1ec8ec9d18dabd67e9c7e01abf5718206512e9169b2cc38c4ec2654b1ae289d7955945995ea55fda637c7a6363f8f053baa681a6ba463c1b6f3a2519bf1b714c
7
- data.tar.gz: 37dc7aa8612c40f696c6902e3e791811ac801106a1c45243ba5597f73bbecd7198b077d8d37340a7e010c995252c092ea3f8a869498cac0844762d5d72412ab3
6
+ metadata.gz: 8028861136a23be938d906811375d24dd4da26ad25c33b97ed8872316610daf34ca8e83a285a16e5b76445bab5fe8164592848442a51d7c3dd00696e9092f688
7
+ data.tar.gz: 50b64203c44b39b458fd8ea5dc7610ba6d1e22418a74762d6ebae78c06a14a31ea5c3e7606eec21cf1a60bf6c82f1d1be6549b3eb8a7f3945777f35d93b27714
@@ -0,0 +1,18 @@
1
+ on:
2
+ pull_request: {}
3
+ push:
4
+ branches:
5
+ - main
6
+ - master
7
+ name: Semgrep
8
+ jobs:
9
+ semgrep:
10
+ name: Scan
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - uses: returntocorp/semgrep-action@v1
15
+ with:
16
+ auditOn: push
17
+ publishToken: ${{ secrets.SEMGREP_APP_TOKEN }}
18
+ publishDeployment: 607
data/.gitignore CHANGED
File without changes
data/CODE_OF_CONDUCT.md CHANGED
File without changes
data/Gemfile CHANGED
File without changes
data/Gemfile.lock CHANGED
@@ -1,79 +1,79 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pulse-downloader (0.1.0)
4
+ pulse-downloader (0.1.4)
5
5
  active_attr (~> 0.15)
6
6
  httparty (~> 0.18)
7
- nokogiri (~> 1.10.9)
7
+ nokogiri (~> 1.11)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- actionpack (6.0.3.2)
13
- actionview (= 6.0.3.2)
14
- activesupport (= 6.0.3.2)
15
- rack (~> 2.0, >= 2.0.8)
12
+ actionpack (6.1.3.1)
13
+ actionview (= 6.1.3.1)
14
+ activesupport (= 6.1.3.1)
15
+ rack (~> 2.0, >= 2.0.9)
16
16
  rack-test (>= 0.6.3)
17
17
  rails-dom-testing (~> 2.0)
18
18
  rails-html-sanitizer (~> 1.0, >= 1.2.0)
19
- actionview (6.0.3.2)
20
- activesupport (= 6.0.3.2)
19
+ actionview (6.1.3.1)
20
+ activesupport (= 6.1.3.1)
21
21
  builder (~> 3.1)
22
22
  erubi (~> 1.4)
23
23
  rails-dom-testing (~> 2.0)
24
24
  rails-html-sanitizer (~> 1.1, >= 1.2.0)
25
- active_attr (0.15.0)
26
- actionpack (>= 3.0.2, < 6.1)
27
- activemodel (>= 3.0.2, < 6.1)
28
- activesupport (>= 3.0.2, < 6.1)
29
- activemodel (6.0.3.2)
30
- activesupport (= 6.0.3.2)
31
- activesupport (6.0.3.2)
25
+ active_attr (0.15.3)
26
+ actionpack (>= 3.0.2, < 7.0)
27
+ activemodel (>= 3.0.2, < 7.0)
28
+ activesupport (>= 3.0.2, < 7.0)
29
+ activemodel (6.1.3.1)
30
+ activesupport (= 6.1.3.1)
31
+ activesupport (6.1.3.1)
32
32
  concurrent-ruby (~> 1.0, >= 1.0.2)
33
- i18n (>= 0.7, < 2)
34
- minitest (~> 5.1)
35
- tzinfo (~> 1.1)
36
- zeitwerk (~> 2.2, >= 2.2.2)
33
+ i18n (>= 1.6, < 2)
34
+ minitest (>= 5.1)
35
+ tzinfo (~> 2.0)
36
+ zeitwerk (~> 2.3)
37
37
  addressable (2.7.0)
38
38
  public_suffix (>= 2.0.2, < 5.0)
39
39
  ansi (1.5.0)
40
40
  builder (3.2.4)
41
41
  coderay (1.1.3)
42
- concurrent-ruby (1.1.6)
43
- crack (0.4.3)
44
- safe_yaml (~> 1.0.0)
42
+ concurrent-ruby (1.1.8)
43
+ crack (0.4.5)
44
+ rexml
45
45
  crass (1.0.6)
46
- erubi (1.9.0)
46
+ erubi (1.10.0)
47
47
  hashdiff (1.0.1)
48
48
  httparty (0.18.1)
49
49
  mime-types (~> 3.0)
50
50
  multi_xml (>= 0.5.2)
51
- i18n (1.8.3)
51
+ i18n (1.8.10)
52
52
  concurrent-ruby (~> 1.0)
53
- loofah (2.6.0)
53
+ loofah (2.9.1)
54
54
  crass (~> 1.0.2)
55
55
  nokogiri (>= 1.5.9)
56
56
  method_source (1.0.0)
57
57
  mime-types (3.3.1)
58
58
  mime-types-data (~> 3.2015)
59
- mime-types-data (3.2020.0512)
60
- mini_portile2 (2.4.0)
61
- minitest (5.14.1)
59
+ mime-types-data (3.2021.0225)
60
+ minitest (5.14.4)
62
61
  minitest-focus (1.1.2)
63
62
  minitest (>= 4, < 6)
64
- minitest-reporters (1.4.2)
63
+ minitest-reporters (1.4.3)
65
64
  ansi
66
65
  builder
67
66
  minitest (>= 5.0)
68
67
  ruby-progressbar
69
68
  mocha (1.11.2)
70
69
  multi_xml (0.6.0)
71
- nokogiri (1.10.10)
72
- mini_portile2 (~> 2.4.0)
73
- pry (0.13.1)
70
+ nokogiri (1.11.3-x86_64-linux)
71
+ racc (~> 1.4)
72
+ pry (0.14.1)
74
73
  coderay (~> 1.1)
75
74
  method_source (~> 1.0)
76
- public_suffix (4.0.5)
75
+ public_suffix (4.0.6)
76
+ racc (1.5.2)
77
77
  rack (2.2.3)
78
78
  rack-test (1.1.0)
79
79
  rack (>= 1.0, < 3)
@@ -83,23 +83,22 @@ GEM
83
83
  rails-html-sanitizer (1.3.0)
84
84
  loofah (~> 2.3)
85
85
  rake (12.3.3)
86
- ruby-progressbar (1.10.1)
87
- safe_yaml (1.0.5)
88
- thread_safe (0.3.6)
89
- timecop (0.9.1)
90
- tzinfo (1.2.7)
91
- thread_safe (~> 0.1)
86
+ rexml (3.2.5)
87
+ ruby-progressbar (1.11.0)
88
+ timecop (0.9.4)
89
+ tzinfo (2.0.4)
90
+ concurrent-ruby (~> 1.0)
92
91
  webmock (3.8.3)
93
92
  addressable (>= 2.3.6)
94
93
  crack (>= 0.3.2)
95
94
  hashdiff (>= 0.4.0, < 2.0.0)
96
- zeitwerk (2.4.0)
95
+ zeitwerk (2.4.2)
97
96
 
98
97
  PLATFORMS
99
- ruby
98
+ x86_64-linux
100
99
 
101
100
  DEPENDENCIES
102
- bundler (~> 2.1.4)
101
+ bundler (~> 2.2.16)
103
102
  minitest (~> 5.0)
104
103
  minitest-focus (~> 1.1.2)
105
104
  minitest-reporters (~> 1.4.2)
@@ -111,4 +110,4 @@ DEPENDENCIES
111
110
  webmock (~> 3.8.3)
112
111
 
113
112
  BUNDLED WITH
114
- 2.1.4
113
+ 2.2.16
data/LICENSE CHANGED
File without changes
data/LICENSE.txt CHANGED
File without changes
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # Pulse::Downloader
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/pulse/downloader`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
3
+ This is a library to download a specific group of files linked to on an html page.
6
4
 
7
5
  ## Installation
8
6
 
@@ -22,7 +20,24 @@ Or install it yourself as:
22
20
 
23
21
  ## Usage
24
22
 
25
- TODO: Write usage instructions here
23
+ ```ruby
24
+ require 'pulse/downloader'
25
+
26
+ client = Pulse::Downloader::Client.new(
27
+ url: '',
28
+ file_type: 'zip',
29
+ save_data: true,
30
+ save_path: '',
31
+ read_from_save_path: false,
32
+ verify_ssl: true,
33
+ drop_exitsing_files_in_path: false,
34
+ save_and_dont_return: true,
35
+ report_time: false,
36
+ progress_bar: true
37
+ )
38
+
39
+ client.call!
40
+ ```
26
41
 
27
42
  ## Development
28
43
 
@@ -34,7 +49,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
34
49
 
35
50
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/pulse-downloader. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/pulse-downloader/blob/master/CODE_OF_CONDUCT.md).
36
51
 
37
-
38
52
  ## License
39
53
 
40
54
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile CHANGED
File without changes
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
 
4
4
  require "pulse/downloader/version"
5
5
  require 'pulse/downloader/web_page_parser'
6
+ require 'pulse/downloader/file_checker'
6
7
  require 'pulse/downloader/file_downloader'
7
8
  require 'pulse/downloader/client'
8
9
 
@@ -2,16 +2,52 @@ module Pulse
2
2
  module Downloader
3
3
  class Client
4
4
  include ::Pulse::Downloader::WebPageParser
5
+ include ::Pulse::Downloader::FileChecker
5
6
  include ::Pulse::Downloader::FileDownloader
6
7
 
7
- attr_reader :path, :file_type, :save_data, :save_path, :read_from_save_path
8
+ attr_reader :url,
9
+ :file_type,
10
+ :save_data,
11
+ :save_path,
12
+ :read_from_save_path,
13
+ :verify_ssl,
14
+ :drop_exitsing_files_in_path,
15
+ :save_and_dont_return,
16
+ :report_time,
17
+ :start_time,
18
+ :end_time,
19
+ :progress_bar
8
20
 
9
- def initialize(path:, file_type:, save_data: false, save_path: '', read_from_save_path: false)
10
- @path = path
21
+ # Does not continue downloads-
22
+ # Will only save once the file has been downloaded in memory
23
+
24
+ # TODO: Validation
25
+ # TODO: Retry
26
+ # TODO: DNS
27
+ def initialize(url:,
28
+ file_type:,
29
+ save_data: false,
30
+ save_path: '',
31
+ read_from_save_path: false,
32
+ verify_ssl: true,
33
+ drop_exitsing_files_in_path: false,
34
+ save_and_dont_return: true,
35
+ report_time: false,
36
+ progress_bar: false)
37
+
38
+ @url = url
11
39
  @file_type = file_type
12
40
  @save_data = save_data
13
41
  @save_path = save_path
14
42
  @read_from_save_path = read_from_save_path
43
+ @verify_ssl = verify_ssl
44
+ @drop_exitsing_files_in_path = drop_exitsing_files_in_path
45
+ @save_and_dont_return = save_and_dont_return
46
+ @report_time = report_time
47
+
48
+ if progress_bar
49
+ @progress_bar = ProgressBar.new
50
+ end
15
51
  end
16
52
 
17
53
  def call!
@@ -21,8 +57,13 @@ module Pulse
21
57
  def call
22
58
  return false unless valid?
23
59
 
60
+ if @progress_bar
61
+ @progress_bar = ProgressBar.new(fetch_file_paths.size)
62
+ end
63
+
24
64
  fetch_file_paths.map do |file_path|
25
- download(file_path)
65
+ download(file_path, @progress_bar)
66
+ @progress_bar.increment!
26
67
  end
27
68
  end
28
69
 
@@ -33,11 +74,17 @@ module Pulse
33
74
  private
34
75
 
35
76
  def get_micro_second_time
36
- (Time.now.to_f * 1000000).to_i
77
+ (Time.now.to_f * 1000).to_i
37
78
  end
38
79
 
39
- def compute_filename(file_path)
40
- file_path.scan(/[\/]\S+/).last
80
+ def print_time(progress_bar=nil)
81
+ output = "Request time: #{end_time - start_time} ms."
82
+
83
+ if progress_bar
84
+ progress_bar.puts output
85
+ else
86
+ puts output
87
+ end
41
88
  end
42
89
  end
43
90
  end
@@ -0,0 +1,27 @@
1
+ module Pulse
2
+ module Downloader
3
+ module FileChecker
4
+ def file_path_in_file_list?(file_path)
5
+ return false unless drop_exitsing_files_in_path && save_data
6
+
7
+ list_files_in(save_path).include?(compute_save_path(file_path))
8
+ end
9
+
10
+ private
11
+
12
+ def compute_save_path(url)
13
+ "#{save_path}/#{compute_filename(url)}".gsub('//', '/')
14
+ end
15
+
16
+ def compute_filename(file_path)
17
+ file_path.scan(/[\/]\S+/).last
18
+ end
19
+
20
+ def list_files_in(path)
21
+ `ls #{path}`.split("\n").map do |filename|
22
+ "#{path}/#{filename}".gsub('//', '/')
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,29 +1,71 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module FileDownloader
4
- # save_path are defined in client.rb
5
- def download(file_path)
4
+ # save_path and verify_ssl are defined in client.rb
5
+ def download(file_path, progress_bar=nil)
6
6
  raise "save_path is undefined" if save_data && save_path == ''
7
+ return if file_path_in_file_list?(file_path) # skip downloading the file
7
8
 
8
- start_time = get_micro_second_time
9
+ @start_time = get_micro_second_time
9
10
 
10
- file_data = HTTParty.get(file_path)
11
+ file_data = HTTParty.get(compute_file_link(file_path), verify: verify_ssl)
11
12
 
12
- # TODO: Use the time
13
- end_time = get_micro_second_time
13
+ @end_time = get_micro_second_time
14
+
15
+ if report_time
16
+ print_time(progress_bar)
17
+ end
14
18
 
15
19
  if save_data
16
- File.open("#{save_path}/#{compute_filename(file_path)}", 'wb') do |file|
20
+ File.open(compute_save_path(file_path), 'wb') do |file|
17
21
  file.write(file_data.body)
18
22
  end
23
+
24
+ return true if save_and_dont_return
19
25
  end
20
26
 
21
27
  file_data
22
28
  end
23
29
 
30
+ def fetch_save_paths
31
+ fetch_file_paths.map do |file_path|
32
+ "#{save_path}/#{compute_filename(file_path)}"
33
+ end
34
+ end
35
+
24
36
  def compute_hash_of(data)
25
37
  { data: data }.hash
26
38
  end
39
+
40
+ private
41
+
42
+ def compute_file_link(file_path)
43
+ if section?(file_path)
44
+ raise 'invalid download path'
45
+ elsif absolute?(file_path)
46
+ file_path
47
+ elsif relative?(file_path)
48
+ "#{url}/#{file_path}"
49
+ else
50
+ "#{url}/#{file_path}"
51
+ end
52
+ end
53
+
54
+ def absolute?(file_path)
55
+ file_path.include?('http://') ||
56
+ file_path.include?('https://') ||
57
+ file_path.include?('ftp://') ||
58
+ file_path.include?('sftp://')||
59
+ file_path.include?('file://')
60
+ end
61
+
62
+ def relative?(file_path)
63
+ file_path[0] == '/'
64
+ end
65
+
66
+ def section?(file_path)
67
+ file_path[0] == '#'
68
+ end
27
69
  end
28
70
  end
29
71
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.0"
3
+ VERSION = "0.1.5"
4
4
  end
5
5
  end
@@ -2,24 +2,30 @@ module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
4
  def fetch_file_paths
5
- start_time = get_micro_second_time
5
+ @start_time = get_micro_second_time
6
6
 
7
- response = HTTParty.get(@path)
7
+ response = HTTParty.get(url, verify: verify_ssl)
8
8
 
9
- # TODO: Use the time
10
- end_time = get_micro_second_time
11
- extract_file_urls(response, start_time, end_time)
9
+ @end_time = get_micro_second_time
10
+
11
+ if report_time
12
+ print_time
13
+ end
14
+
15
+ extract_file_urls(response)
12
16
  end
13
17
 
14
18
  private
15
19
 
16
- def extract_file_urls(response, start_time, end_time)
17
- parse_html(response)
20
+ def extract_file_urls(response)
21
+ return [] if response.body.nil? || response.body.empty?
22
+
23
+ parse_html(response.body)
18
24
  .css('a')
19
25
  .to_a
20
26
  .map { |link| link['href'] }
21
27
  .compact
22
- .select { |link| link.include? @file_type }
28
+ .select { |link| link.include? file_type }
23
29
  end
24
30
 
25
31
  def parse_html(raw_html)
@@ -22,10 +22,11 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.add_dependency "httparty", "~> 0.18"
24
24
  spec.add_dependency "active_attr", "~> 0.15"
25
- spec.add_dependency "nokogiri", "~> 1.10.9"
25
+ spec.add_dependency "nokogiri", "~> 1.11"
26
+ spec.add_dependency "progress_bar", "~> 1.3.3"
26
27
 
27
28
  # Development dependancies
28
- spec.add_development_dependency "bundler", "~> 2.1.4"
29
+ spec.add_development_dependency "bundler", "~> 2.2.16"
29
30
  spec.add_development_dependency "rake", "~> 13.0"
30
31
  spec.add_development_dependency "minitest", "~> 5.0"
31
32
  spec.add_development_dependency "minitest-focus", "~> 1.1.2"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-15 00:00:00.000000000 Z
11
+ date: 2021-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -44,28 +44,42 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 1.10.9
47
+ version: '1.11'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 1.10.9
54
+ version: '1.11'
55
+ - !ruby/object:Gem::Dependency
56
+ name: progress_bar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 1.3.3
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 1.3.3
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: bundler
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: 2.1.4
75
+ version: 2.2.16
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: 2.1.4
82
+ version: 2.2.16
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rake
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -185,6 +199,7 @@ executables: []
185
199
  extensions: []
186
200
  extra_rdoc_files: []
187
201
  files:
202
+ - ".github/workflows/semgrep.yml"
188
203
  - ".gitignore"
189
204
  - CODE_OF_CONDUCT.md
190
205
  - Gemfile
@@ -197,6 +212,7 @@ files:
197
212
  - bin/setup
198
213
  - lib/pulse/downloader.rb
199
214
  - lib/pulse/downloader/client.rb
215
+ - lib/pulse/downloader/file_checker.rb
200
216
  - lib/pulse/downloader/file_downloader.rb
201
217
  - lib/pulse/downloader/version.rb
202
218
  - lib/pulse/downloader/web_page_parser.rb
@@ -205,7 +221,7 @@ homepage: https://github.com/TRex22/pulse-downloader
205
221
  licenses:
206
222
  - MIT
207
223
  metadata: {}
208
- post_install_message:
224
+ post_install_message:
209
225
  rdoc_options: []
210
226
  require_paths:
211
227
  - lib
@@ -220,8 +236,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
220
236
  - !ruby/object:Gem::Version
221
237
  version: '0'
222
238
  requirements: []
223
- rubygems_version: 3.1.4
224
- signing_key:
239
+ rubygems_version: 3.2.3
240
+ signing_key:
225
241
  specification_version: 4
226
242
  summary: Client to download datasets from webpages.
227
243
  test_files: []