pulse-downloader 0.1.18 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 60924c9c23911294930b5b63367a0d2f4417d437b0dcd67b384f01b470a3b689
4
- data.tar.gz: 333418aa168343213ba458e5707ca767386647d3b397ed768a2fa08a7bc36b3a
3
+ metadata.gz: e354feaf3ad68d91b2b58b1410ad297582ace9b6f3493b17cf8ae11d11c18d8d
4
+ data.tar.gz: 3e036681abb31307b958bba6038d1fda3a9634195d71c61af3e8896bd695b22a
5
5
  SHA512:
6
- metadata.gz: 358a581f98aefc4b1a66734c9d89166b0c46b8b5d1a8686ec9925fb33b2c7ba1de8a3cf4ffac4fb1aa74597bd38876dee958391d0946b4c29345f8e2bf9a2605
7
- data.tar.gz: db36a2f85b073a7fc8de3a8b92d6abf2c93ac32f70ace834f1fd1b0fb2594f348576ba562201de5df25d4597e0215046f2994af8c50412413e671b69ed230273
6
+ metadata.gz: cf401b59b944a0bdca308998c19f4c8b996fadf60cc8118c06a909b5eaa1ca98d9f0f3d234af7050ce71611475d8f9a9c4a79503003649870a5d7f18dc62caa0
7
+ data.tar.gz: 98bd01ea727425dad6a932fd431c241b023d954134827ac781faab9b90559a3d05c75916eb4e0bf99d4f8f4c4244b2ce7b019df8ced9c474ad8efe18fa4e7e7d
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.18"
3
+ VERSION = "0.1.23"
4
4
  end
5
5
  end
@@ -1,7 +1,7 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
- def fetch_file_paths
4
+ def fetch_file_paths(custom_path_root=nil)
5
5
  @start_time = get_micro_second_time
6
6
 
7
7
  response = HTTParty.get(url, verify: verify_ssl)
@@ -12,27 +12,30 @@ module Pulse
12
12
  print_time
13
13
  end
14
14
 
15
- extract_file_urls(response)
15
+ extract_file_urls(response, custom_path_root)
16
16
  end
17
17
 
18
18
  private
19
19
 
20
- def extract_file_urls(response)
20
+ def extract_file_urls(response, custom_path_root)
21
21
  return [] if response.body.nil? || response.body.empty?
22
- extract_download_links(response) + extract_embedded_images(response)
22
+ (
23
+ extract_download_links(response, custom_path_root) +
24
+ extract_embedded_images(response, custom_path_root)
25
+ ).uniq
23
26
  end
24
27
 
25
- def extract_download_links(response)
28
+ def extract_download_links(response, custom_path_root)
26
29
  parse_html(response.body)
27
30
  .css('a')
28
31
  .to_a
29
32
  .map { |link| link['href'] }
30
33
  .compact
31
- .select { |link| link.include? file_type }
34
+ .select { |link| (link.include? file_type || link.include?(custom_path_root)) }
32
35
  .map { |link| add_base_url(link) }
33
36
  end
34
37
 
35
- def extract_embedded_images(response)
38
+ def extract_embedded_images(response, custom_path_root)
36
39
  return [] unless scrape_images
37
40
 
38
41
  parse_html(response.body)
@@ -40,7 +43,7 @@ module Pulse
40
43
  .to_a
41
44
  .map { |e| e["src"] }
42
45
  .compact
43
- .select { |link| link.include? file_type }
46
+ .select { |link| (link.include? file_type || link.include?(custom_path_root)) }
44
47
  .map { |link| add_base_url(link) }
45
48
  end
46
49
 
@@ -49,8 +52,8 @@ module Pulse
49
52
  end
50
53
 
51
54
  def add_base_url(str)
52
- unless str.include?(base_url)
53
- "#{base_url}#{str}"
55
+ if !str.include?('https://') && !str.include?(base_url)
56
+ "https://#{base_url}#{str}"
54
57
  else
55
58
  str
56
59
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.18
4
+ version: 0.1.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22