pulse-downloader 0.1.1 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bee880729f0af4203d680eb78a1c3f0375137f14b4647e707ae6d4445e0cdd21
4
- data.tar.gz: 504e7020e0af6291d7b39ba06f03fd40b23baae1c39263a5e0906982fdb38212
3
+ metadata.gz: 29c8e757ece7a21581d6e2699e21ee7ff2c7f10ec97c1b11ca42f7b360933f23
4
+ data.tar.gz: c010c991869f751618d91e766091b8ff1de846feb068b627855738a21777ea74
5
5
  SHA512:
6
- metadata.gz: e32fb830552f65170cdf26a2a13789cb058843df064ef2da2f393ad91cb8e5b06c741885bd7d6d665021bb2eb17ef7157ade02cd03b9218bfe4f753a798100d9
7
- data.tar.gz: 39186157cbe4cff6c91e7a9d9b2424af1720563f18c9ca4ceb438e1bbe854de743b1677525cc57495393a080b72bfcd2e5aeceb78f37bd0f0efb5bcdfd0a755e
6
+ metadata.gz: 37ac13c68205845e979e6f2031a412fd6ea5b9fd679ffba89a33625a380fd60a6a0c142cbc4580a036cee777d48175251ad5cced875f3a6d6a2e9305c6d6518c
7
+ data.tar.gz: eca9461c1f3d722e42da98d7328712291a8fa41fe37d162d8c6aaed208978eed36c6b5c7a7db244defaab7f0fc561e3a5bedee659d9989e25148d89273e2439e
@@ -0,0 +1,18 @@
1
+ on:
2
+ pull_request: {}
3
+ push:
4
+ branches:
5
+ - main
6
+ - master
7
+ name: Semgrep
8
+ jobs:
9
+ semgrep:
10
+ name: Scan
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - uses: returntocorp/semgrep-action@v1
15
+ with:
16
+ auditOn: push
17
+ publishToken: ${{ secrets.SEMGREP_APP_TOKEN }}
18
+ publishDeployment: 607
data/.gitignore CHANGED
File without changes
data/CODE_OF_CONDUCT.md CHANGED
File without changes
data/Gemfile CHANGED
File without changes
data/Gemfile.lock CHANGED
@@ -1,79 +1,79 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pulse-downloader (0.1.1)
4
+ pulse-downloader (0.1.4)
5
5
  active_attr (~> 0.15)
6
6
  httparty (~> 0.18)
7
- nokogiri (~> 1.10.9)
7
+ nokogiri (~> 1.11)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- actionpack (6.0.3.2)
13
- actionview (= 6.0.3.2)
14
- activesupport (= 6.0.3.2)
15
- rack (~> 2.0, >= 2.0.8)
12
+ actionpack (6.1.3.1)
13
+ actionview (= 6.1.3.1)
14
+ activesupport (= 6.1.3.1)
15
+ rack (~> 2.0, >= 2.0.9)
16
16
  rack-test (>= 0.6.3)
17
17
  rails-dom-testing (~> 2.0)
18
18
  rails-html-sanitizer (~> 1.0, >= 1.2.0)
19
- actionview (6.0.3.2)
20
- activesupport (= 6.0.3.2)
19
+ actionview (6.1.3.1)
20
+ activesupport (= 6.1.3.1)
21
21
  builder (~> 3.1)
22
22
  erubi (~> 1.4)
23
23
  rails-dom-testing (~> 2.0)
24
24
  rails-html-sanitizer (~> 1.1, >= 1.2.0)
25
- active_attr (0.15.0)
26
- actionpack (>= 3.0.2, < 6.1)
27
- activemodel (>= 3.0.2, < 6.1)
28
- activesupport (>= 3.0.2, < 6.1)
29
- activemodel (6.0.3.2)
30
- activesupport (= 6.0.3.2)
31
- activesupport (6.0.3.2)
25
+ active_attr (0.15.3)
26
+ actionpack (>= 3.0.2, < 7.0)
27
+ activemodel (>= 3.0.2, < 7.0)
28
+ activesupport (>= 3.0.2, < 7.0)
29
+ activemodel (6.1.3.1)
30
+ activesupport (= 6.1.3.1)
31
+ activesupport (6.1.3.1)
32
32
  concurrent-ruby (~> 1.0, >= 1.0.2)
33
- i18n (>= 0.7, < 2)
34
- minitest (~> 5.1)
35
- tzinfo (~> 1.1)
36
- zeitwerk (~> 2.2, >= 2.2.2)
33
+ i18n (>= 1.6, < 2)
34
+ minitest (>= 5.1)
35
+ tzinfo (~> 2.0)
36
+ zeitwerk (~> 2.3)
37
37
  addressable (2.7.0)
38
38
  public_suffix (>= 2.0.2, < 5.0)
39
39
  ansi (1.5.0)
40
40
  builder (3.2.4)
41
41
  coderay (1.1.3)
42
- concurrent-ruby (1.1.6)
43
- crack (0.4.3)
44
- safe_yaml (~> 1.0.0)
42
+ concurrent-ruby (1.1.8)
43
+ crack (0.4.5)
44
+ rexml
45
45
  crass (1.0.6)
46
- erubi (1.9.0)
46
+ erubi (1.10.0)
47
47
  hashdiff (1.0.1)
48
48
  httparty (0.18.1)
49
49
  mime-types (~> 3.0)
50
50
  multi_xml (>= 0.5.2)
51
- i18n (1.8.3)
51
+ i18n (1.8.10)
52
52
  concurrent-ruby (~> 1.0)
53
- loofah (2.6.0)
53
+ loofah (2.9.1)
54
54
  crass (~> 1.0.2)
55
55
  nokogiri (>= 1.5.9)
56
56
  method_source (1.0.0)
57
57
  mime-types (3.3.1)
58
58
  mime-types-data (~> 3.2015)
59
- mime-types-data (3.2020.0512)
60
- mini_portile2 (2.4.0)
61
- minitest (5.14.1)
59
+ mime-types-data (3.2021.0225)
60
+ minitest (5.14.4)
62
61
  minitest-focus (1.1.2)
63
62
  minitest (>= 4, < 6)
64
- minitest-reporters (1.4.2)
63
+ minitest-reporters (1.4.3)
65
64
  ansi
66
65
  builder
67
66
  minitest (>= 5.0)
68
67
  ruby-progressbar
69
68
  mocha (1.11.2)
70
69
  multi_xml (0.6.0)
71
- nokogiri (1.10.10)
72
- mini_portile2 (~> 2.4.0)
73
- pry (0.13.1)
70
+ nokogiri (1.11.3-x86_64-linux)
71
+ racc (~> 1.4)
72
+ pry (0.14.1)
74
73
  coderay (~> 1.1)
75
74
  method_source (~> 1.0)
76
- public_suffix (4.0.5)
75
+ public_suffix (4.0.6)
76
+ racc (1.5.2)
77
77
  rack (2.2.3)
78
78
  rack-test (1.1.0)
79
79
  rack (>= 1.0, < 3)
@@ -83,23 +83,22 @@ GEM
83
83
  rails-html-sanitizer (1.3.0)
84
84
  loofah (~> 2.3)
85
85
  rake (12.3.3)
86
- ruby-progressbar (1.10.1)
87
- safe_yaml (1.0.5)
88
- thread_safe (0.3.6)
89
- timecop (0.9.1)
90
- tzinfo (1.2.7)
91
- thread_safe (~> 0.1)
86
+ rexml (3.2.5)
87
+ ruby-progressbar (1.11.0)
88
+ timecop (0.9.4)
89
+ tzinfo (2.0.4)
90
+ concurrent-ruby (~> 1.0)
92
91
  webmock (3.8.3)
93
92
  addressable (>= 2.3.6)
94
93
  crack (>= 0.3.2)
95
94
  hashdiff (>= 0.4.0, < 2.0.0)
96
- zeitwerk (2.4.0)
95
+ zeitwerk (2.4.2)
97
96
 
98
97
  PLATFORMS
99
- ruby
98
+ x86_64-linux
100
99
 
101
100
  DEPENDENCIES
102
- bundler (~> 2.1.4)
101
+ bundler (~> 2.2.16)
103
102
  minitest (~> 5.0)
104
103
  minitest-focus (~> 1.1.2)
105
104
  minitest-reporters (~> 1.4.2)
@@ -111,4 +110,4 @@ DEPENDENCIES
111
110
  webmock (~> 3.8.3)
112
111
 
113
112
  BUNDLED WITH
114
- 2.1.4
113
+ 2.2.16
data/LICENSE CHANGED
File without changes
data/LICENSE.txt CHANGED
File without changes
data/README.md CHANGED
@@ -30,8 +30,13 @@ client = Pulse::Downloader::Client.new(
30
30
  save_path: '',
31
31
  read_from_save_path: false,
32
32
  verify_ssl: true,
33
- report_time: false
33
+ drop_exitsing_files_in_path: false,
34
+ save_and_dont_return: true,
35
+ report_time: false,
36
+ progress_bar: true
34
37
  )
38
+
39
+ client.call!
35
40
  ```
36
41
 
37
42
  ## Development
data/Rakefile CHANGED
File without changes
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
 
4
4
  require "pulse/downloader/version"
5
5
  require 'pulse/downloader/web_page_parser'
6
+ require 'pulse/downloader/file_checker'
6
7
  require 'pulse/downloader/file_downloader'
7
8
  require 'pulse/downloader/client'
8
9
 
@@ -1,7 +1,9 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  class Client
4
+ require 'progress_bar'
4
5
  include ::Pulse::Downloader::WebPageParser
6
+ include ::Pulse::Downloader::FileChecker
5
7
  include ::Pulse::Downloader::FileDownloader
6
8
 
7
9
  attr_reader :url,
@@ -10,21 +12,40 @@ module Pulse
10
12
  :save_path,
11
13
  :read_from_save_path,
12
14
  :verify_ssl,
15
+ :drop_exitsing_files_in_path,
16
+ :save_and_dont_return,
13
17
  :report_time,
14
18
  :start_time,
15
- :end_time
19
+ :end_time,
20
+ :progress_bar
21
+
22
+ # Does not continue downloads-
23
+ # Will only save once the file has been downloaded in memory
16
24
 
17
25
  # TODO: Validation
18
26
  # TODO: Retry
19
27
  # TODO: DNS
20
- def initialize(url:, file_type:, save_data: false, save_path: '', read_from_save_path: false, verify_ssl: true, report_time: false)
28
+ def initialize(url:,
29
+ file_type:,
30
+ save_data: false,
31
+ save_path: '',
32
+ read_from_save_path: false,
33
+ verify_ssl: true,
34
+ drop_exitsing_files_in_path: false,
35
+ save_and_dont_return: true,
36
+ report_time: false,
37
+ progress_bar: false)
38
+
21
39
  @url = url
22
40
  @file_type = file_type
23
41
  @save_data = save_data
24
42
  @save_path = save_path
25
43
  @read_from_save_path = read_from_save_path
26
44
  @verify_ssl = verify_ssl
45
+ @drop_exitsing_files_in_path = drop_exitsing_files_in_path
46
+ @save_and_dont_return = save_and_dont_return
27
47
  @report_time = report_time
48
+ @progress_bar = progress_bar
28
49
  end
29
50
 
30
51
  def call!
@@ -34,8 +55,13 @@ module Pulse
34
55
  def call
35
56
  return false unless valid?
36
57
 
58
+ if @progress_bar
59
+ @progress_bar = ::ProgressBar.new(fetch_file_paths.size)
60
+ end
61
+
37
62
  fetch_file_paths.map do |file_path|
38
- download(file_path)
63
+ download(file_path, @progress_bar)
64
+ @progress_bar.increment!
39
65
  end
40
66
  end
41
67
 
@@ -49,8 +75,14 @@ module Pulse
49
75
  (Time.now.to_f * 1000).to_i
50
76
  end
51
77
 
52
- def print_time
53
- puts "Request time: #{end_time - start_time} ms."
78
+ def print_time(progress_bar=nil)
79
+ output = "Request time: #{end_time - start_time} ms."
80
+
81
+ if progress_bar
82
+ progress_bar.puts output
83
+ else
84
+ puts output
85
+ end
54
86
  end
55
87
  end
56
88
  end
@@ -0,0 +1,27 @@
1
+ module Pulse
2
+ module Downloader
3
+ module FileChecker
4
+ def file_path_in_file_list?(file_path)
5
+ return false unless drop_exitsing_files_in_path && save_data
6
+
7
+ list_files_in(save_path).include?(compute_save_path(file_path))
8
+ end
9
+
10
+ private
11
+
12
+ def compute_save_path(url)
13
+ "#{save_path}/#{compute_filename(url)}".gsub('//', '/')
14
+ end
15
+
16
+ def compute_filename(file_path)
17
+ file_path.scan(/[\/]\S+/).last
18
+ end
19
+
20
+ def list_files_in(path)
21
+ `ls #{path}`.split("\n").map do |filename|
22
+ "#{path}/#{filename}".gsub('//', '/')
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -2,8 +2,9 @@ module Pulse
2
2
  module Downloader
3
3
  module FileDownloader
4
4
  # save_path and verify_ssl are defined in client.rb
5
- def download(file_path)
5
+ def download(file_path, progress_bar=nil)
6
6
  raise "save_path is undefined" if save_data && save_path == ''
7
+ return if file_path_in_file_list?(file_path) # skip downloading the file
7
8
 
8
9
  @start_time = get_micro_second_time
9
10
 
@@ -12,13 +13,15 @@ module Pulse
12
13
  @end_time = get_micro_second_time
13
14
 
14
15
  if report_time
15
- print_time
16
+ print_time(progress_bar)
16
17
  end
17
18
 
18
19
  if save_data
19
20
  File.open(compute_save_path(file_path), 'wb') do |file|
20
21
  file.write(file_data.body)
21
22
  end
23
+
24
+ return true if save_and_dont_return
22
25
  end
23
26
 
24
27
  file_data
@@ -36,21 +39,33 @@ module Pulse
36
39
 
37
40
  private
38
41
 
39
- def compute_save_path(url)
40
- "#{save_path}/#{compute_filename(url)}"
41
- end
42
-
43
- def compute_filename(file_path)
44
- file_path.scan(/[\/]\S+/).last
45
- end
46
-
47
42
  def compute_file_link(file_path)
48
- if file_path[0] == '/'
43
+ if section?(file_path)
44
+ raise 'invalid download path'
45
+ elsif absolute?(file_path)
46
+ file_path
47
+ elsif relative?(file_path)
49
48
  "#{url}/#{file_path}"
50
49
  else
51
- file_path
50
+ "#{url}/#{file_path}"
52
51
  end
53
52
  end
53
+
54
+ def absolute?(file_path)
55
+ file_path.include?('http://') ||
56
+ file_path.include?('https://') ||
57
+ file_path.include?('ftp://') ||
58
+ file_path.include?('sftp://')||
59
+ file_path.include?('file://')
60
+ end
61
+
62
+ def relative?(file_path)
63
+ file_path[0] == '/'
64
+ end
65
+
66
+ def section?(file_path)
67
+ file_path[0] == '#'
68
+ end
54
69
  end
55
70
  end
56
71
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.1"
3
+ VERSION = "0.1.6"
4
4
  end
5
5
  end
File without changes
@@ -22,10 +22,11 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.add_dependency "httparty", "~> 0.18"
24
24
  spec.add_dependency "active_attr", "~> 0.15"
25
- spec.add_dependency "nokogiri", "~> 1.10.9"
25
+ spec.add_dependency "nokogiri", "~> 1.11"
26
+ spec.add_dependency "progress_bar", "~> 1.3.3"
26
27
 
27
28
  # Development dependancies
28
- spec.add_development_dependency "bundler", "~> 2.1.4"
29
+ spec.add_development_dependency "bundler", "~> 2.2.16"
29
30
  spec.add_development_dependency "rake", "~> 13.0"
30
31
  spec.add_development_dependency "minitest", "~> 5.0"
31
32
  spec.add_development_dependency "minitest-focus", "~> 1.1.2"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-15 00:00:00.000000000 Z
11
+ date: 2021-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -44,28 +44,42 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 1.10.9
47
+ version: '1.11'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 1.10.9
54
+ version: '1.11'
55
+ - !ruby/object:Gem::Dependency
56
+ name: progress_bar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 1.3.3
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 1.3.3
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: bundler
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: 2.1.4
75
+ version: 2.2.16
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: 2.1.4
82
+ version: 2.2.16
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rake
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -185,6 +199,7 @@ executables: []
185
199
  extensions: []
186
200
  extra_rdoc_files: []
187
201
  files:
202
+ - ".github/workflows/semgrep.yml"
188
203
  - ".gitignore"
189
204
  - CODE_OF_CONDUCT.md
190
205
  - Gemfile
@@ -197,6 +212,7 @@ files:
197
212
  - bin/setup
198
213
  - lib/pulse/downloader.rb
199
214
  - lib/pulse/downloader/client.rb
215
+ - lib/pulse/downloader/file_checker.rb
200
216
  - lib/pulse/downloader/file_downloader.rb
201
217
  - lib/pulse/downloader/version.rb
202
218
  - lib/pulse/downloader/web_page_parser.rb
@@ -205,7 +221,7 @@ homepage: https://github.com/TRex22/pulse-downloader
205
221
  licenses:
206
222
  - MIT
207
223
  metadata: {}
208
- post_install_message:
224
+ post_install_message:
209
225
  rdoc_options: []
210
226
  require_paths:
211
227
  - lib
@@ -220,8 +236,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
220
236
  - !ruby/object:Gem::Version
221
237
  version: '0'
222
238
  requirements: []
223
- rubygems_version: 3.1.4
224
- signing_key:
239
+ rubygems_version: 3.2.3
240
+ signing_key:
225
241
  specification_version: 4
226
242
  summary: Client to download datasets from webpages.
227
243
  test_files: []