pulse-downloader 0.1.13 → 0.1.18

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b348fc1621983108ea7e5bbdc2e8d590aa06706659b7017b1d72b4fde2c5dc57
4
- data.tar.gz: 93ab089ee24ee1176b1bd93d7488ea764a58e505a5d4d5707f1b7dc537e403c7
3
+ metadata.gz: 60924c9c23911294930b5b63367a0d2f4417d437b0dcd67b384f01b470a3b689
4
+ data.tar.gz: 333418aa168343213ba458e5707ca767386647d3b397ed768a2fa08a7bc36b3a
5
5
  SHA512:
6
- metadata.gz: 6e3a684973ec28331ef7bafcc10639ba96ba6aaa9f0ededc2dab914d9226cb323caf4afae24bdb8b23a5059b5ecb20dcfea5352dc63eb6559a1e6efb9473b867
7
- data.tar.gz: 87d5bc975ca2124ba5e82d38dbc24c140e72f15e45288e7675c822e0becebeb1df26e5684fcfc96681e7c6b78f7feae3516ae77567405facfedce14db8aed78a
6
+ metadata.gz: 358a581f98aefc4b1a66734c9d89166b0c46b8b5d1a8686ec9925fb33b2c7ba1de8a3cf4ffac4fb1aa74597bd38876dee958391d0946b4c29345f8e2bf9a2605
7
+ data.tar.gz: db36a2f85b073a7fc8de3a8b92d6abf2c93ac32f70ace834f1fd1b0fb2594f348576ba562201de5df25d4597e0215046f2994af8c50412413e671b69ed230273
@@ -8,6 +8,7 @@ module Pulse
8
8
 
9
9
  attr_reader :url,
10
10
  :file_type,
11
+ :scrape_images,
11
12
  :save_data,
12
13
  :save_path,
13
14
  :read_from_save_path,
@@ -17,7 +18,8 @@ module Pulse
17
18
  :report_time,
18
19
  :start_time,
19
20
  :end_time,
20
- :progress_bar
21
+ :progress_bar,
22
+ :base_url
21
23
 
22
24
  # Does not continue downloads-
23
25
  # Will only save once the file has been downloaded in memory
@@ -29,6 +31,7 @@ module Pulse
29
31
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
30
32
  def initialize(url:,
31
33
  file_type:,
34
+ scrape_images: false,
32
35
  save_data: false,
33
36
  save_path: '',
34
37
  read_from_save_path: false,
@@ -40,6 +43,7 @@ module Pulse
40
43
 
41
44
  @url = url
42
45
  @file_type = file_type
46
+ @scrape_images = scrape_images
43
47
  @save_data = save_data
44
48
  @save_path = save_path
45
49
  @read_from_save_path = read_from_save_path
@@ -48,6 +52,8 @@ module Pulse
48
52
  @save_and_dont_return = save_and_dont_return
49
53
  @report_time = report_time
50
54
  @progress_bar = progress_bar
55
+
56
+ @base_url = get_base_url
51
57
  end
52
58
 
53
59
  def call!
@@ -73,6 +79,16 @@ module Pulse
73
79
 
74
80
  private
75
81
 
82
+ def get_base_url
83
+ url_breakdown = url.split('/')
84
+
85
+ if url_breakdown.first.include?('https')
86
+ url_breakdown[2]
87
+ else
88
+ url_breakdown.first
89
+ end
90
+ end
91
+
76
92
  def get_micro_second_time
77
93
  (Time.now.to_f * 1000).to_i
78
94
  end
@@ -72,7 +72,7 @@ module Pulse
72
72
  str.gsub!("$", "\%24")
73
73
  str.gsub!("&", "\%26")
74
74
  str.gsub!("`", "\%60")
75
- str.gsub!(":", "\%3A")
75
+ # str.gsub!(":", "\%3A")
76
76
  str.gsub!("<", "\%3C")
77
77
  str.gsub!(">", "\%3E")
78
78
  str.gsub!("[", "\%5B")
@@ -85,7 +85,7 @@ module Pulse
85
85
  str.gsub!("#", "\%23")
86
86
  str.gsub!("\%", "\%25")
87
87
  str.gsub!("@", "\%40")
88
- str.gsub!("/", "\%2F")
88
+ # str.gsub!("/", "\%2F")
89
89
  str.gsub!(";", "\%3B")
90
90
  str.gsub!("=", "\%3D")
91
91
  str.gsub!("?", "\%3F")
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.13"
3
+ VERSION = "0.1.18"
4
4
  end
5
5
  end
@@ -19,18 +19,42 @@ module Pulse
19
19
 
20
20
  def extract_file_urls(response)
21
21
  return [] if response.body.nil? || response.body.empty?
22
+ extract_download_links(response) + extract_embedded_images(response)
23
+ end
22
24
 
25
+ def extract_download_links(response)
23
26
  parse_html(response.body)
24
27
  .css('a')
25
28
  .to_a
26
29
  .map { |link| link['href'] }
27
30
  .compact
28
31
  .select { |link| link.include? file_type }
32
+ .map { |link| add_base_url(link) }
33
+ end
34
+
35
+ def extract_embedded_images(response)
36
+ return [] unless scrape_images
37
+
38
+ parse_html(response.body)
39
+ .css('img')
40
+ .to_a
41
+ .map { |e| e["src"] }
42
+ .compact
43
+ .select { |link| link.include? file_type }
44
+ .map { |link| add_base_url(link) }
29
45
  end
30
46
 
31
47
  def parse_html(raw_html)
32
48
  Nokogiri::HTML(raw_html)
33
49
  end
50
+
51
+ def add_base_url(str)
52
+ unless str.include?(base_url)
53
+ "#{base_url}#{str}"
54
+ else
55
+ str
56
+ end
57
+ end
34
58
  end
35
59
  end
36
60
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2021-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty