rcarvalho-image_scraper 0.1.8.2 → 0.1.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/image_scraper.gemspec +1 -1
- data/lib/image_scraper/client.rb +2 -3
- data/lib/image_scraper/util.rb +9 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d5014335accdba31dc9b9b383f7421b7ae70575
|
4
|
+
data.tar.gz: 99ed329c5092b56c200456b3d9d9d75e4b18eada
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 325570c10e64036e4e97ac08e1dab0913cc0be29a0d4600c18fbee6c0fa94310121390ee54f87be14dedaed017883699a576e1aa56e47a2364c5561b00098dcc
|
7
|
+
data.tar.gz: 0036a9afbf286177627085406c5e08a3b78f0d68f0f481e6c290de940a6f3f40af283d8a84f1f9a3b79add7e671e7c61b029c2dd5d8448f0a690c7977bb827bb
|
data/image_scraper.gemspec
CHANGED
data/lib/image_scraper/client.rb
CHANGED
@@ -39,12 +39,11 @@ module ImageScraper
|
|
39
39
|
css = file.string rescue IO.read(file) rescue next
|
40
40
|
|
41
41
|
images += css.scan(/url\((.*?)\)/).collect do |image_url|
|
42
|
-
image_url = URI.escape image_url[0]
|
42
|
+
image_url = URI.escape ImageScraper::Util.cleanup_url(image_url[0])
|
43
43
|
image_url = image_url.gsub(/([{}|\^\[\]\@`])/) {|s| CGI.escape(s)} # escape characters that URI.escape doesn't get
|
44
44
|
if image_url.include?("data:image") and @include_css_data_images
|
45
45
|
image_url
|
46
46
|
else
|
47
|
-
image_url = ImageScraper::Util.strip_quotes(image_url)
|
48
47
|
@convert_to_absolute_url ? ImageScraper::Util.absolute_url(stylesheet, image_url) : image_url
|
49
48
|
end
|
50
49
|
end
|
@@ -55,7 +54,7 @@ module ImageScraper
|
|
55
54
|
def stylesheets
|
56
55
|
return [] if doc.blank?
|
57
56
|
doc.xpath('//link[@rel="stylesheet"]').collect do |stylesheet|
|
58
|
-
ImageScraper::Util.absolute_url url, URI.escape(stylesheet['href'])
|
57
|
+
ImageScraper::Util.absolute_url url, URI.escape(ImageScraper::Util.cleanup_url(stylesheet['href']))
|
59
58
|
end
|
60
59
|
end
|
61
60
|
end
|
data/lib/image_scraper/util.rb
CHANGED
@@ -26,7 +26,15 @@ module ImageScraper
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def self.strip_quotes(image_url)
|
29
|
-
image_url.
|
29
|
+
image_url.gsub("'","").gsub('"','')
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.chomp(image_url)
|
33
|
+
image_url.gsub(/\s/,'')
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.cleanup_url(image_url)
|
37
|
+
ImageScraper::Util.chomp(ImageScraper::Util.strip_quotes(image_url))
|
30
38
|
end
|
31
39
|
end
|
32
40
|
end
|