pulse-downloader 0.1.20 → 0.1.25

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a2d2732c3a0307e7dffaf1ec3b60691e7b54ca3de4c4084325908f6fdf1b801
4
- data.tar.gz: 06b3a802a55f186f8fccceec0e29d6b6e3dbade7be776699a6a327ccacf68060
3
+ metadata.gz: 48153054f979c1f1ed2e36c211edb598f921b41e6f5e0b71bfd8368eaa4112f3
4
+ data.tar.gz: 1c831b86a9ead79b51446ed0b911ee54d3b410437d20214fddb57da808a3938a
5
5
  SHA512:
6
- metadata.gz: 27c5f745685fbb5b2c207113b4fe28eb97b8638551afb931e3be1e4d9f073c5959ba0cc1ed59db44fbdfaae26218840ca8663da95fba37ef7c0cf18a37f2aefc
7
- data.tar.gz: 3a06bbe0fafe7329eec9ba1d90e483da181f309915b06bfb67d49c3de9f1c96402047d775f672ed172979ca210d988e7063acd5c53860dc61bfcf508a6f64457
6
+ metadata.gz: f0a0e5d9dbeca80d2de42ae1e655191e9aa01c557992b141412cd1dfe86751b3e047b951fc2f6bbf28ea487b7bc8f80d1d173d76c129ca38194cda34f894ef7a
7
+ data.tar.gz: cf2a9d907e5f35a792c4d9b7f89ea0e0a96b50de499ade69b62afe0108857a7324836021b3ba338d22c05744789b8e252db08a1ae36177f3a5f671003bee6a32
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.20"
3
+ VERSION = "0.1.25"
4
4
  end
5
5
  end
@@ -1,7 +1,7 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
- def fetch_file_paths
4
+ def fetch_file_paths(custom_path_root=nil)
5
5
  @start_time = get_micro_second_time
6
6
 
7
7
  response = HTTParty.get(url, verify: verify_ssl)
@@ -12,29 +12,47 @@ module Pulse
12
12
  print_time
13
13
  end
14
14
 
15
- extract_file_urls(response)
15
+ if file_type.is_a?(Array)
16
+ file_type.flat_map do |type|
17
+ extract_file_urls(response, custom_path_root, type)
18
+ end
19
+ else
20
+ extract_file_urls(response, custom_path_root, file_type)
21
+ end
16
22
  end
17
23
 
18
24
  private
19
25
 
20
- def extract_file_urls(response)
26
+ def extract_file_urls(response, custom_path_root, type)
21
27
  return [] if response.body.nil? || response.body.empty?
22
28
  (
23
- extract_download_links(response) + extract_embedded_images(response)
29
+ extract_all_urls(response, custom_path_root, type) +
30
+ extract_download_links(response, custom_path_root, type) +
31
+ extract_embedded_images(response, custom_path_root, type)
24
32
  ).uniq
25
33
  end
26
34
 
27
- def extract_download_links(response)
35
+ def extract_all_urls(response, custom_path_root, type)
36
+ parse_html(response.body)
37
+ .to_s
38
+ .split(/\s+/)
39
+ .find_all { |u| u =~ /^https?:/ }
40
+ .compact
41
+ .select { |link| (link.include? type || link.include?(custom_path_root)) }
42
+ .map { |link| add_base_url(link) }
43
+ end
44
+
45
+ def extract_download_links(response, custom_path_root, type)
28
46
  parse_html(response.body)
29
47
  .css('a')
30
48
  .to_a
31
49
  .map { |link| link['href'] }
32
50
  .compact
33
- .select { |link| link.include? file_type }
51
+ .select { |link| (link.include? type || link.include?(custom_path_root)) }
34
52
  .map { |link| add_base_url(link) }
35
53
  end
36
54
 
37
- def extract_embedded_images(response)
55
+ def extract_embedded_images(response, custom_path_root, type)
38
56
  return [] unless scrape_images
39
57
 
40
58
  parse_html(response.body)
@@ -42,7 +60,7 @@ module Pulse
42
60
  .to_a
43
61
  .map { |e| e["src"] }
44
62
  .compact
45
- .select { |link| link.include? file_type }
63
+ .select { |link| (link.include? type || link.include?(custom_path_root)) }
46
64
  .map { |link| add_base_url(link) }
47
65
  end
48
66
 
@@ -51,9 +69,7 @@ module Pulse
51
69
  end
52
70
 
53
71
  def add_base_url(str)
54
- unless str.include?('https://')
55
- str
56
- unless str.include?(base_url)
72
+ if !str.include?('https://') && !str.include?(base_url)
57
73
  "https://#{base_url}#{str}"
58
74
  else
59
75
  str
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20
4
+ version: 0.1.25
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22