pulse-downloader 0.1.10 → 0.1.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 73e18a59a2534ac4b716222495792c2624a827aa220647579321552baa31e451
4
- data.tar.gz: 25c64012141ae32139ff3fb6eed9ed8e388a5ce001580127dc28040135731cae
3
+ metadata.gz: 4d2a34a08bede3b49a3d3629fc2d785d6337dc62702dbeb86742323b4baa11aa
4
+ data.tar.gz: 29e276ed758b83ff22453f7774afd4e28aec0b6eac3436302287e239e73e0d73
5
5
  SHA512:
6
- metadata.gz: 1423d3cf884fca38e31d5b54133e5449f365f848f6a1d441edfa053031f8cf5a6b19f99b9b0bbdc2d8d71126fa5294051d101f38ca7d43a7440105644d2ed1f7
7
- data.tar.gz: b86aac8791cb20f12f1e9b992e650614e4f895658c492b61288f581c1aa43148fd21a76612a73df3edd660b5e6d0c3589659c8c97243abc126ab973a1a5bcf07
6
+ metadata.gz: ddd1e433cc5be43243d83867107089945aa756328fa3e3fb4d239b682bfa88c29563e8401a3dccadd266d87169129ae994132a178179740c91520b790a7209c8
7
+ data.tar.gz: cb3c310e893e3bb8c095d1e42b299f5bb3c9f95128b1657f65e066e7c60d154b22947f5d70d8e675f71438e7b0fe5c83cecef5a8806ba8b1a08e29f4a8f4bfb9
data/Gemfile.lock CHANGED
@@ -1,23 +1,24 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pulse-downloader (0.1.4)
4
+ pulse-downloader (0.1.11)
5
5
  active_attr (~> 0.15)
6
6
  httparty (~> 0.18)
7
7
  nokogiri (~> 1.11)
8
+ progress_bar (~> 1.3.3)
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
11
12
  specs:
12
- actionpack (6.1.3.1)
13
- actionview (= 6.1.3.1)
14
- activesupport (= 6.1.3.1)
13
+ actionpack (6.1.3.2)
14
+ actionview (= 6.1.3.2)
15
+ activesupport (= 6.1.3.2)
15
16
  rack (~> 2.0, >= 2.0.9)
16
17
  rack-test (>= 0.6.3)
17
18
  rails-dom-testing (~> 2.0)
18
19
  rails-html-sanitizer (~> 1.0, >= 1.2.0)
19
- actionview (6.1.3.1)
20
- activesupport (= 6.1.3.1)
20
+ actionview (6.1.3.2)
21
+ activesupport (= 6.1.3.2)
21
22
  builder (~> 3.1)
22
23
  erubi (~> 1.4)
23
24
  rails-dom-testing (~> 2.0)
@@ -26,9 +27,9 @@ GEM
26
27
  actionpack (>= 3.0.2, < 7.0)
27
28
  activemodel (>= 3.0.2, < 7.0)
28
29
  activesupport (>= 3.0.2, < 7.0)
29
- activemodel (6.1.3.1)
30
- activesupport (= 6.1.3.1)
31
- activesupport (6.1.3.1)
30
+ activemodel (6.1.3.2)
31
+ activesupport (= 6.1.3.2)
32
+ activesupport (6.1.3.2)
32
33
  concurrent-ruby (~> 1.0, >= 1.0.2)
33
34
  i18n (>= 1.6, < 2)
34
35
  minitest (>= 5.1)
@@ -45,6 +46,7 @@ GEM
45
46
  crass (1.0.6)
46
47
  erubi (1.10.0)
47
48
  hashdiff (1.0.1)
49
+ highline (2.0.3)
48
50
  httparty (0.18.1)
49
51
  mime-types (~> 3.0)
50
52
  multi_xml (>= 0.5.2)
@@ -69,6 +71,10 @@ GEM
69
71
  multi_xml (0.6.0)
70
72
  nokogiri (1.11.3-x86_64-linux)
71
73
  racc (~> 1.4)
74
+ options (2.3.2)
75
+ progress_bar (1.3.3)
76
+ highline (>= 1.6, < 3)
77
+ options (~> 2.3.0)
72
78
  pry (0.14.1)
73
79
  coderay (~> 1.1)
74
80
  method_source (~> 1.0)
@@ -8,6 +8,7 @@ module Pulse
8
8
 
9
9
  attr_reader :url,
10
10
  :file_type,
11
+ :scrape_images,
11
12
  :save_data,
12
13
  :save_path,
13
14
  :read_from_save_path,
@@ -29,6 +30,7 @@ module Pulse
29
30
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
30
31
  def initialize(url:,
31
32
  file_type:,
33
+ scrape_images: false,
32
34
  save_data: false,
33
35
  save_path: '',
34
36
  read_from_save_path: false,
@@ -40,6 +42,7 @@ module Pulse
40
42
 
41
43
  @url = url
42
44
  @file_type = file_type
45
+ @scrape_images = scrape_images
43
46
  @save_data = save_data
44
47
  @save_path = save_path
45
48
  @read_from_save_path = read_from_save_path
@@ -1,8 +1,6 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module FileDownloader
4
- require 'uri'
5
-
6
4
  # save_path and verify_ssl are defined in client.rb
7
5
  def download(file_path, progress_bar=nil)
8
6
  raise "save_path is undefined" if save_data && save_path == ''
@@ -10,7 +8,7 @@ module Pulse
10
8
 
11
9
  @start_time = get_micro_second_time
12
10
 
13
- file_data = HTTParty.get(URI.escape(compute_file_link(file_path)), verify: verify_ssl)
11
+ file_data = HTTParty.get(escape(compute_file_link(file_path)), verify: verify_ssl)
14
12
 
15
13
  @end_time = get_micro_second_time
16
14
 
@@ -68,6 +66,37 @@ module Pulse
68
66
  def section?(file_path)
69
67
  file_path[0] == '#'
70
68
  end
69
+
70
+ def escape(str)
71
+ str.gsub!(" ", "%20")
72
+ str.gsub!("$", "\%24")
73
+ str.gsub!("&", "\%26")
74
+ str.gsub!("`", "\%60")
75
+ # str.gsub!(":", "\%3A")
76
+ str.gsub!("<", "\%3C")
77
+ str.gsub!(">", "\%3E")
78
+ str.gsub!("[", "\%5B")
79
+ str.gsub!("]", "\%5D")
80
+ str.gsub!("{", "\%7B")
81
+ str.gsub!("}", "\%7D")
82
+ str.gsub!("“", "\%22")
83
+ str.gsub!('"', "\%22")
84
+ str.gsub!("+", "\%2B")
85
+ str.gsub!("#", "\%23")
86
+ str.gsub!("\%", "\%25")
87
+ str.gsub!("@", "\%40")
88
+ # str.gsub!("/", "\%2F")
89
+ str.gsub!(";", "\%3B")
90
+ str.gsub!("=", "\%3D")
91
+ str.gsub!("?", "\%3F")
92
+ str.gsub!("\\", "\%5C")
93
+ str.gsub!("^", "\%5E")
94
+ str.gsub!("|", "\%7C")
95
+ str.gsub!("~", "\%7E")
96
+ str.gsub!("‘", "\%27")
97
+ str.gsub!(",", "\%2C")
98
+ str
99
+ end
71
100
  end
72
101
  end
73
102
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.10"
3
+ VERSION = "0.1.15"
4
4
  end
5
5
  end
@@ -19,7 +19,10 @@ module Pulse
19
19
 
20
20
  def extract_file_urls(response)
21
21
  return [] if response.body.nil? || response.body.empty?
22
+ extract_download_links(response) + extract_embedded_images(response)
23
+ end
22
24
 
25
+ def extract_download_links(response)
23
26
  parse_html(response.body)
24
27
  .css('a')
25
28
  .to_a
@@ -28,6 +31,16 @@ module Pulse
28
31
  .select { |link| link.include? file_type }
29
32
  end
30
33
 
34
+ def extract_embedded_images(response)
35
+ parse_html(response.body)
36
+ .css('img')
37
+ .to_a
38
+ .map { |e| e["src"] }
39
+ .compact
40
+ .select { |link| link.include? file_type }
41
+ .select { |link| link.include? "https://" }
42
+ end
43
+
31
44
  def parse_html(raw_html)
32
45
  Nokogiri::HTML(raw_html)
33
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-22 00:00:00.000000000 Z
11
+ date: 2021-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty