pulse-downloader 0.1.14 → 0.1.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eca91e3c21cef3c07b1f1e26c6f7731a605aa54e6861f673bfff6feeabaf0b55
4
- data.tar.gz: 57f68ce784c9b7d60342ef45d7830034602eedd35a0ee1afa21ca6cc008f275e
3
+ metadata.gz: 4ce4c8fd47e92ddc487cd80c36564a21357a3ad79deee82d0af70812c935ab1e
4
+ data.tar.gz: 3b2dbfc9387fe3c496ae112d3d8c6a7af6cf1b3b0c550fed0da358d9bf798efa
5
5
  SHA512:
6
- metadata.gz: 390bb819137aee83d9bedcedb06865ca3382f5f36832ed68ebc211d6167fb83944ae0f16fe89f67e876e83a1097eae92916328d9454f0e99f355f79adaabc3c0
7
- data.tar.gz: c9e5a5fabb5b33e3251be461b50a8601a0d5cdc6e8d419641d5f441e727f366f841aa3d1d009e0a5817502e388658a1272eea0bf2289db442efa519771b6df03
6
+ metadata.gz: 1b72add3be0201cfc9c395d790c26c77fc8b8f3383048fc334d8b84695d1d0fd8d9dca17af16242a1ff74c54b8a37e81e40262b90c293507b4668902e23477a5
7
+ data.tar.gz: b4a2a5087afe6d10ef1c424169862f146f9911d8dd13369ffe86318bab3a083c815a65449375f3a47d1f9864a6de23b5db9417e509285d8647a47637220b9cdb
@@ -8,6 +8,7 @@ module Pulse
8
8
 
9
9
  attr_reader :url,
10
10
  :file_type,
11
+ :scrape_images,
11
12
  :save_data,
12
13
  :save_path,
13
14
  :read_from_save_path,
@@ -17,7 +18,8 @@ module Pulse
17
18
  :report_time,
18
19
  :start_time,
19
20
  :end_time,
20
- :progress_bar
21
+ :progress_bar,
22
+ :base_url
21
23
 
22
24
  # Does not continue downloads-
23
25
  # Will only save once the file has been downloaded in memory
@@ -29,6 +31,7 @@ module Pulse
29
31
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
30
32
  def initialize(url:,
31
33
  file_type:,
34
+ scrape_images: false,
32
35
  save_data: false,
33
36
  save_path: '',
34
37
  read_from_save_path: false,
@@ -40,6 +43,7 @@ module Pulse
40
43
 
41
44
  @url = url
42
45
  @file_type = file_type
46
+ @scrape_images = scrape_images
43
47
  @save_data = save_data
44
48
  @save_path = save_path
45
49
  @read_from_save_path = read_from_save_path
@@ -48,6 +52,8 @@ module Pulse
48
52
  @save_and_dont_return = save_and_dont_return
49
53
  @report_time = report_time
50
54
  @progress_bar = progress_bar
55
+
56
+ @base_url = get_base_url
51
57
  end
52
58
 
53
59
  def call!
@@ -73,6 +79,16 @@ module Pulse
73
79
 
74
80
  private
75
81
 
82
+ def get_base_url
83
+ url_breakdown = url.split('/')
84
+
85
+ if url_breakdown.first.include?('https')
86
+ url_breakdown[2]
87
+ else
88
+ url_breakdown.first
89
+ end
90
+ end
91
+
76
92
  def get_micro_second_time
77
93
  (Time.now.to_f * 1000).to_i
78
94
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.14"
3
+ VERSION = "0.1.19"
4
4
  end
5
5
  end
@@ -19,18 +19,44 @@ module Pulse
19
19
 
20
20
  def extract_file_urls(response)
21
21
  return [] if response.body.nil? || response.body.empty?
22
+ (
23
+ extract_download_links(response) + extract_embedded_images(response)
24
+ ).uniq
25
+ end
22
26
 
27
+ def extract_download_links(response)
23
28
  parse_html(response.body)
24
29
  .css('a')
25
30
  .to_a
26
31
  .map { |link| link['href'] }
27
32
  .compact
28
33
  .select { |link| link.include? file_type }
34
+ .map { |link| add_base_url(link) }
35
+ end
36
+
37
+ def extract_embedded_images(response)
38
+ return [] unless scrape_images
39
+
40
+ parse_html(response.body)
41
+ .css('img')
42
+ .to_a
43
+ .map { |e| e["src"] }
44
+ .compact
45
+ .select { |link| link.include? file_type }
46
+ .map { |link| add_base_url(link) }
29
47
  end
30
48
 
31
49
  def parse_html(raw_html)
32
50
  Nokogiri::HTML(raw_html)
33
51
  end
52
+
53
+ def add_base_url(str)
54
+ unless str.include?(base_url)
55
+ "https://#{base_url}#{str}"
56
+ else
57
+ str
58
+ end
59
+ end
34
60
  end
35
61
  end
36
62
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.14
4
+ version: 0.1.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2021-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty