pulse-downloader 0.1.11 → 0.1.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 608d5fcfe9c84a1c201f54a4a74b64dbe285580e733a77fe89f42c8645b6c2e4
4
- data.tar.gz: 3252a32fa9955d1d81b158c3cff4c483329dd1d610a663cd48382a0fe40c1ddc
3
+ metadata.gz: 2b4edaf42a90782470ab4956cc6d9cf4214d6bb81c03b8e5b55193ac1867330e
4
+ data.tar.gz: 81da683b9c0aa82f17197a280318b529ab4f6630dd20fa385925739f02165ca2
5
5
  SHA512:
6
- metadata.gz: f10828967940a47f023dc610e3121f00eebe8962fea1830ccb25c9d84fc8b244749d094cfbfbc8c9ce0495cd572823058fc9557dc665e318a680c6e000703fa7
7
- data.tar.gz: 07becda8b5b3c47908c96c6ee5016980ac053c3555b4acac47f1da7d73cf42bcac692351f877cf2ffd2a33d74107a2687614d25a3a8613593d0181bd6159428b
6
+ metadata.gz: d8771a9ece20d74d44bb8e365d7f51be288ee8a9f5ff5dbec826013aec57998147d58d0e17ffd6ab5c7213f2dc441bfd4b471716f99e8df40e06cf2c7c00ddf0
7
+ data.tar.gz: 053c925e99adeeb3cae3f277bb0e3ceda00d327ac57fdc76ce25e9a86220d66a291c0f388aa1aebe66a92e7b9b8971409c7b59dc4d188fa87ee4504914b52052
@@ -8,6 +8,7 @@ module Pulse
8
8
 
9
9
  attr_reader :url,
10
10
  :file_type,
11
+ :scrape_images,
11
12
  :save_data,
12
13
  :save_path,
13
14
  :read_from_save_path,
@@ -29,6 +30,7 @@ module Pulse
29
30
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
30
31
  def initialize(url:,
31
32
  file_type:,
33
+ scrape_images: false,
32
34
  save_data: false,
33
35
  save_path: '',
34
36
  read_from_save_path: false,
@@ -40,6 +42,7 @@ module Pulse
40
42
 
41
43
  @url = url
42
44
  @file_type = file_type
45
+ @scrape_images = scrape_images
43
46
  @save_data = save_data
44
47
  @save_path = save_path
45
48
  @read_from_save_path = read_from_save_path
@@ -1,8 +1,6 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module FileDownloader
4
- require 'uri'
5
-
6
4
  # save_path and verify_ssl are defined in client.rb
7
5
  def download(file_path, progress_bar=nil)
8
6
  raise "save_path is undefined" if save_data && save_path == ''
@@ -10,7 +8,7 @@ module Pulse
10
8
 
11
9
  @start_time = get_micro_second_time
12
10
 
13
- file_data = HTTParty.get(URI.escape(compute_file_link(file_path)), verify: verify_ssl)
11
+ file_data = HTTParty.get(escape(compute_file_link(file_path)), verify: verify_ssl)
14
12
 
15
13
  @end_time = get_micro_second_time
16
14
 
@@ -68,6 +66,37 @@ module Pulse
68
66
  def section?(file_path)
69
67
  file_path[0] == '#'
70
68
  end
69
+
70
+ def escape(str)
71
+ str.gsub!(" ", "%20")
72
+ str.gsub!("$", "\%24")
73
+ str.gsub!("&", "\%26")
74
+ str.gsub!("`", "\%60")
75
+ # str.gsub!(":", "\%3A")
76
+ str.gsub!("<", "\%3C")
77
+ str.gsub!(">", "\%3E")
78
+ str.gsub!("[", "\%5B")
79
+ str.gsub!("]", "\%5D")
80
+ str.gsub!("{", "\%7B")
81
+ str.gsub!("}", "\%7D")
82
+ str.gsub!("“", "\%22")
83
+ str.gsub!('"', "\%22")
84
+ str.gsub!("+", "\%2B")
85
+ str.gsub!("#", "\%23")
86
+ str.gsub!("\%", "\%25")
87
+ str.gsub!("@", "\%40")
88
+ # str.gsub!("/", "\%2F")
89
+ str.gsub!(";", "\%3B")
90
+ str.gsub!("=", "\%3D")
91
+ str.gsub!("?", "\%3F")
92
+ str.gsub!("\\", "\%5C")
93
+ str.gsub!("^", "\%5E")
94
+ str.gsub!("|", "\%7C")
95
+ str.gsub!("~", "\%7E")
96
+ str.gsub!("‘", "\%27")
97
+ str.gsub!(",", "\%2C")
98
+ str
99
+ end
71
100
  end
72
101
  end
73
102
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.11"
3
+ VERSION = "0.1.16"
4
4
  end
5
5
  end
@@ -19,18 +19,44 @@ module Pulse
19
19
 
20
20
  def extract_file_urls(response)
21
21
  return [] if response.body.nil? || response.body.empty?
22
+ extract_download_links(response) + extract_embedded_images(response)
23
+ end
22
24
 
25
+ def extract_download_links(response)
23
26
  parse_html(response.body)
24
27
  .css('a')
25
28
  .to_a
26
29
  .map { |link| link['href'] }
27
30
  .compact
28
31
  .select { |link| link.include? file_type }
32
+ .map { |link| add_base_url(link) }
33
+ end
34
+
35
+ def extract_embedded_images(response)
36
+ return [] unless scrape_images
37
+
38
+ parse_html(response.body)
39
+ .css('img')
40
+ .to_a
41
+ .map { |e| e["src"] }
42
+ .compact
43
+ .select { |link| link.include? file_type }
44
+ .map { |link| add_base_url(link) }
29
45
  end
30
46
 
31
47
  def parse_html(raw_html)
32
48
  Nokogiri::HTML(raw_html)
33
49
  end
50
+
51
+ def add_base_url(str)
52
+ url_breakdown = url.split('/')
53
+
54
+ if url_breakdown.first.include?('https')
55
+ url_breakdown.third
56
+ else
57
+ url_breakdown.first
58
+ end
59
+ end
34
60
  end
35
61
  end
36
62
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2021-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty