pulse-downloader 0.1.14 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eca91e3c21cef3c07b1f1e26c6f7731a605aa54e6861f673bfff6feeabaf0b55
4
- data.tar.gz: 57f68ce784c9b7d60342ef45d7830034602eedd35a0ee1afa21ca6cc008f275e
3
+ metadata.gz: 4ce4c8fd47e92ddc487cd80c36564a21357a3ad79deee82d0af70812c935ab1e
4
+ data.tar.gz: 3b2dbfc9387fe3c496ae112d3d8c6a7af6cf1b3b0c550fed0da358d9bf798efa
5
5
  SHA512:
6
- metadata.gz: 390bb819137aee83d9bedcedb06865ca3382f5f36832ed68ebc211d6167fb83944ae0f16fe89f67e876e83a1097eae92916328d9454f0e99f355f79adaabc3c0
7
- data.tar.gz: c9e5a5fabb5b33e3251be461b50a8601a0d5cdc6e8d419641d5f441e727f366f841aa3d1d009e0a5817502e388658a1272eea0bf2289db442efa519771b6df03
6
+ metadata.gz: 1b72add3be0201cfc9c395d790c26c77fc8b8f3383048fc334d8b84695d1d0fd8d9dca17af16242a1ff74c54b8a37e81e40262b90c293507b4668902e23477a5
7
+ data.tar.gz: b4a2a5087afe6d10ef1c424169862f146f9911d8dd13369ffe86318bab3a083c815a65449375f3a47d1f9864a6de23b5db9417e509285d8647a47637220b9cdb
@@ -8,6 +8,7 @@ module Pulse
8
8
 
9
9
  attr_reader :url,
10
10
  :file_type,
11
+ :scrape_images,
11
12
  :save_data,
12
13
  :save_path,
13
14
  :read_from_save_path,
@@ -17,7 +18,8 @@ module Pulse
17
18
  :report_time,
18
19
  :start_time,
19
20
  :end_time,
20
- :progress_bar
21
+ :progress_bar,
22
+ :base_url
21
23
 
22
24
  # Does not continue downloads-
23
25
  # Will only save once the file has been downloaded in memory
@@ -29,6 +31,7 @@ module Pulse
29
31
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
30
32
  def initialize(url:,
31
33
  file_type:,
34
+ scrape_images: false,
32
35
  save_data: false,
33
36
  save_path: '',
34
37
  read_from_save_path: false,
@@ -40,6 +43,7 @@ module Pulse
40
43
 
41
44
  @url = url
42
45
  @file_type = file_type
46
+ @scrape_images = scrape_images
43
47
  @save_data = save_data
44
48
  @save_path = save_path
45
49
  @read_from_save_path = read_from_save_path
@@ -48,6 +52,8 @@ module Pulse
48
52
  @save_and_dont_return = save_and_dont_return
49
53
  @report_time = report_time
50
54
  @progress_bar = progress_bar
55
+
56
+ @base_url = get_base_url
51
57
  end
52
58
 
53
59
  def call!
@@ -73,6 +79,16 @@ module Pulse
73
79
 
74
80
  private
75
81
 
82
+ def get_base_url
83
+ url_breakdown = url.split('/')
84
+
85
+ if url_breakdown.first.include?('https')
86
+ url_breakdown[2]
87
+ else
88
+ url_breakdown.first
89
+ end
90
+ end
91
+
76
92
  def get_micro_second_time
77
93
  (Time.now.to_f * 1000).to_i
78
94
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.14"
3
+ VERSION = "0.1.19"
4
4
  end
5
5
  end
@@ -19,18 +19,44 @@ module Pulse
19
19
 
20
20
  def extract_file_urls(response)
21
21
  return [] if response.body.nil? || response.body.empty?
22
+ (
23
+ extract_download_links(response) + extract_embedded_images(response)
24
+ ).uniq
25
+ end
22
26
 
27
+ def extract_download_links(response)
23
28
  parse_html(response.body)
24
29
  .css('a')
25
30
  .to_a
26
31
  .map { |link| link['href'] }
27
32
  .compact
28
33
  .select { |link| link.include? file_type }
34
+ .map { |link| add_base_url(link) }
35
+ end
36
+
37
+ def extract_embedded_images(response)
38
+ return [] unless scrape_images
39
+
40
+ parse_html(response.body)
41
+ .css('img')
42
+ .to_a
43
+ .map { |e| e["src"] }
44
+ .compact
45
+ .select { |link| link.include? file_type }
46
+ .map { |link| add_base_url(link) }
29
47
  end
30
48
 
31
49
  def parse_html(raw_html)
32
50
  Nokogiri::HTML(raw_html)
33
51
  end
52
+
53
+ def add_base_url(str)
54
+ unless str.include?(base_url)
55
+ "https://#{base_url}#{str}"
56
+ else
57
+ str
58
+ end
59
+ end
34
60
  end
35
61
  end
36
62
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.14
4
+ version: 0.1.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2021-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty