pulse-downloader 0.1.11 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 608d5fcfe9c84a1c201f54a4a74b64dbe285580e733a77fe89f42c8645b6c2e4
4
- data.tar.gz: 3252a32fa9955d1d81b158c3cff4c483329dd1d610a663cd48382a0fe40c1ddc
3
+ metadata.gz: 2b4edaf42a90782470ab4956cc6d9cf4214d6bb81c03b8e5b55193ac1867330e
4
+ data.tar.gz: 81da683b9c0aa82f17197a280318b529ab4f6630dd20fa385925739f02165ca2
5
5
  SHA512:
6
- metadata.gz: f10828967940a47f023dc610e3121f00eebe8962fea1830ccb25c9d84fc8b244749d094cfbfbc8c9ce0495cd572823058fc9557dc665e318a680c6e000703fa7
7
- data.tar.gz: 07becda8b5b3c47908c96c6ee5016980ac053c3555b4acac47f1da7d73cf42bcac692351f877cf2ffd2a33d74107a2687614d25a3a8613593d0181bd6159428b
6
+ metadata.gz: d8771a9ece20d74d44bb8e365d7f51be288ee8a9f5ff5dbec826013aec57998147d58d0e17ffd6ab5c7213f2dc441bfd4b471716f99e8df40e06cf2c7c00ddf0
7
+ data.tar.gz: 053c925e99adeeb3cae3f277bb0e3ceda00d327ac57fdc76ce25e9a86220d66a291c0f388aa1aebe66a92e7b9b8971409c7b59dc4d188fa87ee4504914b52052
@@ -8,6 +8,7 @@ module Pulse
8
8
 
9
9
  attr_reader :url,
10
10
  :file_type,
11
+ :scrape_images,
11
12
  :save_data,
12
13
  :save_path,
13
14
  :read_from_save_path,
@@ -29,6 +30,7 @@ module Pulse
29
30
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
30
31
  def initialize(url:,
31
32
  file_type:,
33
+ scrape_images: false,
32
34
  save_data: false,
33
35
  save_path: '',
34
36
  read_from_save_path: false,
@@ -40,6 +42,7 @@ module Pulse
40
42
 
41
43
  @url = url
42
44
  @file_type = file_type
45
+ @scrape_images = scrape_images
43
46
  @save_data = save_data
44
47
  @save_path = save_path
45
48
  @read_from_save_path = read_from_save_path
@@ -1,8 +1,6 @@
1
1
  module Pulse
2
2
  module Downloader
3
3
  module FileDownloader
4
- require 'uri'
5
-
6
4
  # save_path and verify_ssl are defined in client.rb
7
5
  def download(file_path, progress_bar=nil)
8
6
  raise "save_path is undefined" if save_data && save_path == ''
@@ -10,7 +8,7 @@ module Pulse
10
8
 
11
9
  @start_time = get_micro_second_time
12
10
 
13
- file_data = HTTParty.get(URI.escape(compute_file_link(file_path)), verify: verify_ssl)
11
+ file_data = HTTParty.get(escape(compute_file_link(file_path)), verify: verify_ssl)
14
12
 
15
13
  @end_time = get_micro_second_time
16
14
 
@@ -68,6 +66,37 @@ module Pulse
68
66
  def section?(file_path)
69
67
  file_path[0] == '#'
70
68
  end
69
+
70
+ def escape(str)
71
+ str.gsub!(" ", "%20")
72
+ str.gsub!("$", "\%24")
73
+ str.gsub!("&", "\%26")
74
+ str.gsub!("`", "\%60")
75
+ # str.gsub!(":", "\%3A")
76
+ str.gsub!("<", "\%3C")
77
+ str.gsub!(">", "\%3E")
78
+ str.gsub!("[", "\%5B")
79
+ str.gsub!("]", "\%5D")
80
+ str.gsub!("{", "\%7B")
81
+ str.gsub!("}", "\%7D")
82
+ str.gsub!("“", "\%22")
83
+ str.gsub!('"', "\%22")
84
+ str.gsub!("+", "\%2B")
85
+ str.gsub!("#", "\%23")
86
+ str.gsub!("\%", "\%25")
87
+ str.gsub!("@", "\%40")
88
+ # str.gsub!("/", "\%2F")
89
+ str.gsub!(";", "\%3B")
90
+ str.gsub!("=", "\%3D")
91
+ str.gsub!("?", "\%3F")
92
+ str.gsub!("\\", "\%5C")
93
+ str.gsub!("^", "\%5E")
94
+ str.gsub!("|", "\%7C")
95
+ str.gsub!("~", "\%7E")
96
+ str.gsub!("‘", "\%27")
97
+ str.gsub!(",", "\%2C")
98
+ str
99
+ end
71
100
  end
72
101
  end
73
102
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.11"
3
+ VERSION = "0.1.16"
4
4
  end
5
5
  end
@@ -19,18 +19,44 @@ module Pulse
19
19
 
20
20
  def extract_file_urls(response)
21
21
  return [] if response.body.nil? || response.body.empty?
22
+ extract_download_links(response) + extract_embedded_images(response)
23
+ end
22
24
 
25
+ def extract_download_links(response)
23
26
  parse_html(response.body)
24
27
  .css('a')
25
28
  .to_a
26
29
  .map { |link| link['href'] }
27
30
  .compact
28
31
  .select { |link| link.include? file_type }
32
+ .map { |link| add_base_url(link) }
33
+ end
34
+
35
+ def extract_embedded_images(response)
36
+ return [] unless scrape_images
37
+
38
+ parse_html(response.body)
39
+ .css('img')
40
+ .to_a
41
+ .map { |e| e["src"] }
42
+ .compact
43
+ .select { |link| link.include? file_type }
44
+ .map { |link| add_base_url(link) }
29
45
  end
30
46
 
31
47
  def parse_html(raw_html)
32
48
  Nokogiri::HTML(raw_html)
33
49
  end
50
+
51
+ def add_base_url(str)
52
+ url_breakdown = url.split('/')
53
+
54
+ if url_breakdown.first.include?('https')
55
+ url_breakdown.third
56
+ else
57
+ url_breakdown.first
58
+ end
59
+ end
34
60
  end
35
61
  end
36
62
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2021-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty