pulse-downloader 0.1.19 → 0.1.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ce4c8fd47e92ddc487cd80c36564a21357a3ad79deee82d0af70812c935ab1e
4
- data.tar.gz: 3b2dbfc9387fe3c496ae112d3d8c6a7af6cf1b3b0c550fed0da358d9bf798efa
3
+ metadata.gz: 13ed97a9caedc58bca6c4247f2d56d7456aecafe0f2eecb65b6c25b5de404fd4
4
+ data.tar.gz: 0d450684690c49812a63a0aec81f9e9d77e6074898a6ac2be9b7143fe5989adb
5
5
  SHA512:
6
- metadata.gz: 1b72add3be0201cfc9c395d790c26c77fc8b8f3383048fc334d8b84695d1d0fd8d9dca17af16242a1ff74c54b8a37e81e40262b90c293507b4668902e23477a5
7
- data.tar.gz: b4a2a5087afe6d10ef1c424169862f146f9911d8dd13369ffe86318bab3a083c815a65449375f3a47d1f9864a6de23b5db9417e509285d8647a47637220b9cdb
6
+ metadata.gz: '001790c43ba48c68ad1222d70f1352e5b399384fb7be69c6951ac80fd629ed2009fcc4a1292e23ab712ddaa1a9f206a59b1abd7f965625486160224e4e7a77bb'
7
+ data.tar.gz: d49f98068eec42477b7069948f4a7c72b37b4e166c6f843b4e7e81763c27eb5e5561115f7b15800e48ed2cd86f5397e6a21f7d3eaf2816a91b017795399fbb9f
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.19"
3
+ VERSION = "0.1.24"
4
4
  end
5
5
  end
@@ -1,7 +1,7 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
- def fetch_file_paths
4
+ def fetch_file_paths(custom_path_root=nil)
5
5
  @start_time = get_micro_second_time
6
6
 
7
7
  response = HTTParty.get(url, verify: verify_ssl)
@@ -12,29 +12,36 @@ module Pulse
12
12
  print_time
13
13
  end
14
14
 
15
- extract_file_urls(response)
15
+ if file_type.is_a?(Array)
16
+ file_type.flat_map do |type|
17
+ extract_file_urls(response, custom_path_root, type)
18
+ end
19
+ else
20
+ extract_file_urls(response, custom_path_root, file_type)
21
+ end
16
22
  end
17
23
 
18
24
  private
19
25
 
20
- def extract_file_urls(response)
26
+ def extract_file_urls(response, custom_path_root, type)
21
27
  return [] if response.body.nil? || response.body.empty?
22
28
  (
23
- extract_download_links(response) + extract_embedded_images(response)
29
+ extract_download_links(response, custom_path_root, type) +
30
+ extract_embedded_images(response, custom_path_root, type)
24
31
  ).uniq
25
32
  end
26
33
 
27
- def extract_download_links(response)
34
+ def extract_download_links(response, custom_path_root, type)
28
35
  parse_html(response.body)
29
36
  .css('a')
30
37
  .to_a
31
38
  .map { |link| link['href'] }
32
39
  .compact
33
- .select { |link| link.include? file_type }
40
+ .select { |link| (link.include? type || link.include?(custom_path_root)) }
34
41
  .map { |link| add_base_url(link) }
35
42
  end
36
43
 
37
- def extract_embedded_images(response)
44
+ def extract_embedded_images(response, custom_path_root, type)
38
45
  return [] unless scrape_images
39
46
 
40
47
  parse_html(response.body)
@@ -42,7 +49,7 @@ module Pulse
42
49
  .to_a
43
50
  .map { |e| e["src"] }
44
51
  .compact
45
- .select { |link| link.include? file_type }
52
+ .select { |link| (link.include? type || link.include?(custom_path_root)) }
46
53
  .map { |link| add_base_url(link) }
47
54
  end
48
55
 
@@ -51,7 +58,7 @@ module Pulse
51
58
  end
52
59
 
53
60
  def add_base_url(str)
54
- unless str.include?(base_url)
61
+ if !str.include?('https://') && !str.include?(base_url)
55
62
  "https://#{base_url}#{str}"
56
63
  else
57
64
  str
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.19
4
+ version: 0.1.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22