pulse-downloader 0.1.12 → 0.1.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 17c6b467560188e6ccbf9549fe0be00188726daf87352d295788ccbbb986ead3
4
- data.tar.gz: a88019ce7c0708767b8c76bb5796ec34950f1ddf02cd7f2e8b83cb09126bcbb8
3
+ metadata.gz: 0b50ec4293c1d064d672f7b9c8271837420deb866826a85d8c3016ca348bfd39
4
+ data.tar.gz: d4b35e8706bece03ba778320bb524fd8164f3b7e1933a3f79939b38abe5f9d20
5
5
  SHA512:
6
- metadata.gz: b96d3ec2c08d342d4884c8d413d5133d1a71776f12e9dfc8c12829db9f1f0fc02f784f58f07481692f13b8d7a1a2dce66bd7e2c049077d1a337a676876a15c5b
7
- data.tar.gz: b60ca5f1204ae2efd86c31c946dbcbff3d02c4279d83fd8f36c07d9909e4103db1df98b9009b5305b8271c3ba6c6fe7bda4daf708bb0505f15aab316e08af0a5
6
+ metadata.gz: e8626f65c533de9ca6422d6a1ea18c018e2ca23316ecd2678a5c9a733fe65d72eb1fd921e9a1215839e8501849cc36cb3c96d4e9cfa39e107fbc5422bc2d7be7
7
+ data.tar.gz: 70cc3bb87274c39ee93060292ee343144157806dd8900f78a564363bfad2a997182b2ae104ec0941603256fc8d38583d041f01e541d520ed169dc51989fb1f1b
@@ -8,6 +8,7 @@ module Pulse
8
8
 
9
9
  attr_reader :url,
10
10
  :file_type,
11
+ :scrape_images,
11
12
  :save_data,
12
13
  :save_path,
13
14
  :read_from_save_path,
@@ -17,7 +18,8 @@ module Pulse
17
18
  :report_time,
18
19
  :start_time,
19
20
  :end_time,
20
- :progress_bar
21
+ :progress_bar,
22
+ :base_url
21
23
 
22
24
  # Does not continue downloads-
23
25
  # Will only save once the file has been downloaded in memory
@@ -29,6 +31,7 @@ module Pulse
29
31
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
30
32
  def initialize(url:,
31
33
  file_type:,
34
+ scrape_images: false,
32
35
  save_data: false,
33
36
  save_path: '',
34
37
  read_from_save_path: false,
@@ -40,6 +43,7 @@ module Pulse
40
43
 
41
44
  @url = url
42
45
  @file_type = file_type
46
+ @scrape_images = scrape_images
43
47
  @save_data = save_data
44
48
  @save_path = save_path
45
49
  @read_from_save_path = read_from_save_path
@@ -48,6 +52,8 @@ module Pulse
48
52
  @save_and_dont_return = save_and_dont_return
49
53
  @report_time = report_time
50
54
  @progress_bar = progress_bar
55
+
56
+ @base_url = get_base_url
51
57
  end
52
58
 
53
59
  def call!
@@ -73,6 +79,16 @@ module Pulse
73
79
 
74
80
  private
75
81
 
82
+ def get_base_url
83
+ url_breakdown = url.split('/')
84
+
85
+ if url_breakdown.first.include?('https')
86
+ url_breakdown[2]
87
+ else
88
+ url_breakdown.first
89
+ end
90
+ end
91
+
76
92
  def get_micro_second_time
77
93
  (Time.now.to_f * 1000).to_i
78
94
  end
@@ -68,33 +68,33 @@ module Pulse
68
68
  end
69
69
 
70
70
  def escape(str)
71
- str.gsub("Space", "%20")
72
- str.gsub("$", "\%24")
73
- str.gsub("&", "\%26")
74
- str.gsub("`", "\%60")
75
- str.gsub(":", "\%3A")
76
- str.gsub("<", "\%3C")
77
- str.gsub(">", "\%3E")
78
- str.gsub("[", "\%5B")
79
- str.gsub("]", "\%5D")
80
- str.gsub("{", "\%7B")
81
- str.gsub("}", "\%7D")
82
- str.gsub("“", "\%22")
83
- str.gsub('"', "\%22")
84
- str.gsub("+", "\%2B")
85
- str.gsub("#", "\%23")
86
- str.gsub("\%", "\%25")
87
- str.gsub("@", "\%40")
88
- str.gsub("/", "\%2F")
89
- str.gsub(";", "\%3B")
90
- str.gsub("=", "\%3D")
91
- str.gsub("?", "\%3F")
92
- str.gsub("\\", "\%5C")
93
- str.gsub("^", "\%5E")
94
- str.gsub("|", "\%7C")
95
- str.gsub("~", "\%7E")
96
- str.gsub("‘", "\%27")
97
- str.gsub(",", "\%2C")
71
+ str.gsub!(" ", "%20")
72
+ str.gsub!("$", "\%24")
73
+ str.gsub!("&", "\%26")
74
+ str.gsub!("`", "\%60")
75
+ # str.gsub!(":", "\%3A")
76
+ str.gsub!("<", "\%3C")
77
+ str.gsub!(">", "\%3E")
78
+ str.gsub!("[", "\%5B")
79
+ str.gsub!("]", "\%5D")
80
+ str.gsub!("{", "\%7B")
81
+ str.gsub!("}", "\%7D")
82
+ str.gsub!("“", "\%22")
83
+ str.gsub!('"', "\%22")
84
+ str.gsub!("+", "\%2B")
85
+ str.gsub!("#", "\%23")
86
+ str.gsub!("\%", "\%25")
87
+ str.gsub!("@", "\%40")
88
+ # str.gsub!("/", "\%2F")
89
+ str.gsub!(";", "\%3B")
90
+ str.gsub!("=", "\%3D")
91
+ str.gsub!("?", "\%3F")
92
+ str.gsub!("\\", "\%5C")
93
+ str.gsub!("^", "\%5E")
94
+ str.gsub!("|", "\%7C")
95
+ str.gsub!("~", "\%7E")
96
+ str.gsub!("‘", "\%27")
97
+ str.gsub!(",", "\%2C")
98
98
  str
99
99
  end
100
100
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.12"
3
+ VERSION = "0.1.17"
4
4
  end
5
5
  end
@@ -19,18 +19,38 @@ module Pulse
19
19
 
20
20
  def extract_file_urls(response)
21
21
  return [] if response.body.nil? || response.body.empty?
22
+ extract_download_links(response) + extract_embedded_images(response)
23
+ end
22
24
 
25
+ def extract_download_links(response)
23
26
  parse_html(response.body)
24
27
  .css('a')
25
28
  .to_a
26
29
  .map { |link| link['href'] }
27
30
  .compact
28
31
  .select { |link| link.include? file_type }
32
+ .map { |link| add_base_url(link) }
33
+ end
34
+
35
+ def extract_embedded_images(response)
36
+ return [] unless scrape_images
37
+
38
+ parse_html(response.body)
39
+ .css('img')
40
+ .to_a
41
+ .map { |e| e["src"] }
42
+ .compact
43
+ .select { |link| link.include? file_type }
44
+ .map { |link| add_base_url(link) }
29
45
  end
30
46
 
31
47
  def parse_html(raw_html)
32
48
  Nokogiri::HTML(raw_html)
33
49
  end
50
+
51
+ def add_base_url(str)
52
+ "#{base_url}#{str}"
53
+ end
34
54
  end
35
55
  end
36
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.12
4
+ version: 0.1.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2021-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty