pulse-downloader 0.1.27 → 0.1.31

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ed736b97e09aaec1104ac4e817b981c2ebb88fbac583abe0ebf44e721143b087
4
- data.tar.gz: a81646945f24e428738e8a514e671f2f14682f5824faee1e63e07bd498fe519c
3
+ metadata.gz: 3036b6512e69bffe3a1c2b4dbfb156c4b932910dbf95ca4d61d7e89307a86de5
4
+ data.tar.gz: 2aa3cd36ea992d59463d3127ec589eb848b1e59258e1714b9673d2dbd93c7e8d
5
5
  SHA512:
6
- metadata.gz: b684940cab23055ec977672dd8472fcb38ebf8e70e59d18f2a76270a3234f5873c7a6bee47877760c54733af2c073bbaa7a05eec33266a33df316482baa6d836
7
- data.tar.gz: 9b1737bc6607585c6fb6081d9c1b3031bd53ffccc5cce72a0650b163792742dde2b7dffa6ed663775a7cd1cad50f1fb964c07bc555ceee7a2ca66637af25b41a
6
+ metadata.gz: 9df1e6f4b4136a6061e6222df80c2899b28b73f7b68648c7c2ecb7007eb8aa6c26c4a18fd6d65f9c11651504068ef053d9007614dd6fc09765ccdcae2b9946e9
7
+ data.tar.gz: 5ac6bfb3d0bed3bddbefdd752418b6ae6f86c50aa76dfff4dbc1806fcfc0f2336215646af825c787d5cd3cd6bc08b1fd9ec0c5747f50cefa0ed5da449bf7f7e3
data/Gemfile CHANGED
@@ -2,6 +2,3 @@ source "https://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in pulse-downloader.gemspec
4
4
  gemspec
5
-
6
- gem "rake", "~> 12.0"
7
- gem "minitest", "~> 5.0"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pulse-downloader (0.1.26)
4
+ pulse-downloader (0.1.31)
5
5
  active_attr (~> 0.15)
6
6
  httparty (~> 0.18)
7
7
  nokogiri (~> 1.11)
@@ -10,15 +10,15 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- actionpack (6.1.3.2)
14
- actionview (= 6.1.3.2)
15
- activesupport (= 6.1.3.2)
13
+ actionpack (6.1.4.1)
14
+ actionview (= 6.1.4.1)
15
+ activesupport (= 6.1.4.1)
16
16
  rack (~> 2.0, >= 2.0.9)
17
17
  rack-test (>= 0.6.3)
18
18
  rails-dom-testing (~> 2.0)
19
19
  rails-html-sanitizer (~> 1.0, >= 1.2.0)
20
- actionview (6.1.3.2)
21
- activesupport (= 6.1.3.2)
20
+ actionview (6.1.4.1)
21
+ activesupport (= 6.1.4.1)
22
22
  builder (~> 3.1)
23
23
  erubi (~> 1.4)
24
24
  rails-dom-testing (~> 2.0)
@@ -27,20 +27,20 @@ GEM
27
27
  actionpack (>= 3.0.2, < 7.0)
28
28
  activemodel (>= 3.0.2, < 7.0)
29
29
  activesupport (>= 3.0.2, < 7.0)
30
- activemodel (6.1.3.2)
31
- activesupport (= 6.1.3.2)
32
- activesupport (6.1.3.2)
30
+ activemodel (6.1.4.1)
31
+ activesupport (= 6.1.4.1)
32
+ activesupport (6.1.4.1)
33
33
  concurrent-ruby (~> 1.0, >= 1.0.2)
34
34
  i18n (>= 1.6, < 2)
35
35
  minitest (>= 5.1)
36
36
  tzinfo (~> 2.0)
37
37
  zeitwerk (~> 2.3)
38
- addressable (2.7.0)
38
+ addressable (2.8.0)
39
39
  public_suffix (>= 2.0.2, < 5.0)
40
40
  ansi (1.5.0)
41
41
  builder (3.2.4)
42
42
  coderay (1.1.3)
43
- concurrent-ruby (1.1.8)
43
+ concurrent-ruby (1.1.9)
44
44
  crack (0.4.5)
45
45
  rexml
46
46
  crass (1.0.6)
@@ -52,13 +52,13 @@ GEM
52
52
  multi_xml (>= 0.5.2)
53
53
  i18n (1.8.10)
54
54
  concurrent-ruby (~> 1.0)
55
- loofah (2.9.1)
55
+ loofah (2.12.0)
56
56
  crass (~> 1.0.2)
57
57
  nokogiri (>= 1.5.9)
58
58
  method_source (1.0.0)
59
59
  mime-types (3.3.1)
60
60
  mime-types-data (~> 3.2015)
61
- mime-types-data (3.2021.0225)
61
+ mime-types-data (3.2021.0901)
62
62
  minitest (5.14.4)
63
63
  minitest-focus (1.1.2)
64
64
  minitest (>= 4, < 6)
@@ -69,7 +69,7 @@ GEM
69
69
  ruby-progressbar
70
70
  mocha (1.11.2)
71
71
  multi_xml (0.6.0)
72
- nokogiri (1.11.5-x86_64-linux)
72
+ nokogiri (1.12.4-x86_64-linux)
73
73
  racc (~> 1.4)
74
74
  options (2.3.2)
75
75
  progress_bar (1.3.3)
@@ -86,9 +86,9 @@ GEM
86
86
  rails-dom-testing (2.0.3)
87
87
  activesupport (>= 4.2.0)
88
88
  nokogiri (>= 1.6)
89
- rails-html-sanitizer (1.3.0)
89
+ rails-html-sanitizer (1.4.2)
90
90
  loofah (~> 2.3)
91
- rake (12.3.3)
91
+ rake (13.0.6)
92
92
  rexml (3.2.5)
93
93
  ruby-progressbar (1.11.0)
94
94
  timecop (0.9.4)
@@ -111,7 +111,7 @@ DEPENDENCIES
111
111
  mocha (~> 1.11.2)
112
112
  pry (~> 0.13)
113
113
  pulse-downloader!
114
- rake (~> 12.0)
114
+ rake (~> 13.0)
115
115
  timecop (~> 0.9.1)
116
116
  webmock (~> 3.8.3)
117
117
 
@@ -10,8 +10,7 @@ module Pulse
10
10
 
11
11
  file_data = HTTParty.get(
12
12
  escape(compute_file_link(file_path)),
13
- verify: verify_ssl,
14
- headers: headers
13
+ verify: verify_ssl
15
14
  )
16
15
 
17
16
  @end_time = get_micro_second_time
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.27"
3
+ VERSION = "0.1.31"
4
4
  end
5
5
  end
@@ -4,7 +4,7 @@ module Pulse
4
4
  def fetch_file_paths(custom_path_root=nil)
5
5
  @start_time = get_micro_second_time
6
6
 
7
- response = HTTParty.get(url, verify: verify_ssl)
7
+ response = HTTParty.get(url, verify: verify_ssl, headers: headers)
8
8
 
9
9
  @end_time = get_micro_second_time
10
10
 
@@ -25,10 +25,11 @@ module Pulse
25
25
 
26
26
  def extract_file_urls(response, custom_path_root, type)
27
27
  return [] if response.body.nil? || response.body.empty?
28
- remove_base64(
28
+
29
+ remove_artefacts(
29
30
  extract_all_urls(response, custom_path_root, type) +
30
- extract_download_links(response, custom_path_root, type) +
31
- extract_embedded_images(response, custom_path_root, type)
31
+ extract_download_links(response, type) +
32
+ extract_embedded_images(response, type)
32
33
  ).uniq
33
34
  end
34
35
 
@@ -39,20 +40,20 @@ module Pulse
39
40
  .find_all { |u| u =~ /^https?:/ }
40
41
  .compact
41
42
  .select { |link| (link.include? type || link.include?(custom_path_root)) }
42
- .map { |link| add_base_url(link) }
43
+ .map { |link| add_base_url(link, custom_path_root) }
43
44
  end
44
45
 
45
- def extract_download_links(response, custom_path_root, type)
46
+ def extract_download_links(response, type)
46
47
  parse_html(response.body)
47
48
  .css('a')
48
49
  .to_a
49
50
  .map { |link| link['href'] }
50
51
  .compact
51
- .select { |link| (link.include? type || link.include?(custom_path_root)) }
52
+ .select { |link| (link.include? type) }
52
53
  .map { |link| add_base_url(link) }
53
54
  end
54
55
 
55
- def extract_embedded_images(response, custom_path_root, type)
56
+ def extract_embedded_images(response, type)
56
57
  return [] unless scrape_images
57
58
 
58
59
  parse_html(response.body)
@@ -60,10 +61,21 @@ module Pulse
60
61
  .to_a
61
62
  .map { |e| e["src"] }
62
63
  .compact
63
- .select { |link| (link.include? type || link.include?(custom_path_root)) }
64
+ .select { |link| (link.include? type) }
64
65
  .map { |link| add_base_url(link) }
65
66
  end
66
67
 
68
+ def remove_artefacts(urls)
69
+ urls = remove_extra_escape_characters(urls)
70
+ remove_base64(urls)
71
+ end
72
+
73
+ def remove_extra_escape_characters(urls)
74
+ urls.map do |url|
75
+ url.gsub("\">", '')
76
+ end
77
+ end
78
+
67
79
  def remove_base64(urls)
68
80
  urls.reject do |url|
69
81
  url.include?(':image/') || url.include?('base64')
@@ -74,7 +86,9 @@ module Pulse
74
86
  Nokogiri::HTML(raw_html)
75
87
  end
76
88
 
77
- def add_base_url(str)
89
+ def add_base_url(str, custom_path_root=nil)
90
+ return str if custom_path_root
91
+
78
92
  if !str.include?('https://') && !str.include?(base_url)
79
93
  "https://#{base_url}#{str}"
80
94
  else
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.27
4
+ version: 0.1.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-24 00:00:00.000000000 Z
11
+ date: 2021-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty