thumbnail_scraper 0.0.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +1 -0
- data/Gemfile.lock +6 -4
- data/README.md +5 -5
- data/lib/thumbnail_scraper.rb +1 -5
- data/lib/thumbnail_scraper/image.rb +8 -0
- data/lib/thumbnail_scraper/version.rb +1 -1
- data/lib/thumbnail_scraper/webpage.rb +7 -1
- data/spec/units/thumbnail_scraper_spec.rb +12 -11
- data/spec/units/webpage_spec.rb +3 -3
- data/thumbnail_scraper.gemspec +3 -3
- metadata +22 -33
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 85ec2104d8ba31fd892e4128ebd3d997c916a811
|
4
|
+
data.tar.gz: dd3187d661a612bc667306dc3d9bb181feb65905
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 47f9a35c9d88abd11dea525c92c6476ffb97f76de3da38ab29f5b6e8c1dd560bbc84b82e4056ee8df888607c56199265a541a7170f4be8d1255e3b3e8554d747
|
7
|
+
data.tar.gz: 50d9eb695180716d5cd098e016cbd4275c54db2fe9dfc60015ad10d719501c0d75b5c42958209fe294bc73546915120e6697a8633a6c0b37680911f5263d9133
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
thumbnail_scraper (0.0.
|
4
|
+
thumbnail_scraper (0.0.2)
|
5
5
|
fastimage
|
6
6
|
httpclient
|
7
7
|
nokogiri
|
@@ -12,9 +12,11 @@ GEM
|
|
12
12
|
addressable (2.2.8)
|
13
13
|
crack (0.3.1)
|
14
14
|
diff-lcs (1.1.3)
|
15
|
-
fastimage (1.2
|
16
|
-
httpclient (2.
|
17
|
-
|
15
|
+
fastimage (1.5.2)
|
16
|
+
httpclient (2.3.4.1)
|
17
|
+
mini_portile (0.5.3)
|
18
|
+
nokogiri (1.6.1)
|
19
|
+
mini_portile (~> 0.5.0)
|
18
20
|
rspec (2.11.0)
|
19
21
|
rspec-core (~> 2.11.0)
|
20
22
|
rspec-expectations (~> 2.11.0)
|
data/README.md
CHANGED
@@ -10,11 +10,11 @@ require 'thumbnail_scraper'
|
|
10
10
|
|
11
11
|
include ThumbnailScraper
|
12
12
|
scraper = ThumbnailScraper.new
|
13
|
-
image = scraper.
|
13
|
+
image = scraper.image_to_thumbnail_for_url("http://www.example.com/")
|
14
14
|
thumbnail_url = image.url
|
15
15
|
```
|
16
16
|
|
17
|
-
ThumbnailScraper#
|
17
|
+
ThumbnailScraper#image_to_thumbnail_for_url method returns Image object, which contains its size and url.
|
18
18
|
|
19
19
|
Suggested usage
|
20
20
|
---------------
|
@@ -25,13 +25,13 @@ We encourage you to use it with delayed_job as jobs queue and dragonfly as image
|
|
25
25
|
require 'thumbnail_scraper'
|
26
26
|
|
27
27
|
module Jobs
|
28
|
-
class
|
28
|
+
class ScrapeThumbnailJob < Struct.new(:page)
|
29
29
|
def perform
|
30
30
|
scraper = ::ThumbnailScraper::ThumbnailScraper.new
|
31
31
|
image = scraper.image_to_thumbnail_for_url(page.url)
|
32
32
|
page.thumbnail_url = image.url.to_s
|
33
33
|
page.save!
|
34
|
-
end
|
35
|
-
end
|
34
|
+
end
|
35
|
+
end
|
36
36
|
end
|
37
37
|
```
|
data/lib/thumbnail_scraper.rb
CHANGED
@@ -35,11 +35,7 @@ module ThumbnailScraper
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def select_valid_images(images)
|
38
|
-
images.select
|
39
|
-
end
|
40
|
-
|
41
|
-
def image_is_valid?(image)
|
42
|
-
image.width >= 50 && image.height >= 50 && image.width.to_f / image.height.to_f <= 3 && image.height.to_f / image.width.to_f <= 3
|
38
|
+
images.select(&:exists?).select(&:valid?)
|
43
39
|
end
|
44
40
|
end
|
45
41
|
end
|
@@ -9,6 +9,10 @@ module ThumbnailScraper
|
|
9
9
|
self.url = url
|
10
10
|
end
|
11
11
|
|
12
|
+
def exists?
|
13
|
+
size ? true : false
|
14
|
+
end
|
15
|
+
|
12
16
|
def size
|
13
17
|
@size ||= ::FastImage.size(url.to_s)
|
14
18
|
end
|
@@ -24,5 +28,9 @@ module ThumbnailScraper
|
|
24
28
|
def area
|
25
29
|
width * height
|
26
30
|
end
|
31
|
+
|
32
|
+
def valid?
|
33
|
+
width >= 50 && height >= 50 && width.to_f / height.to_f <= 3 && height.to_f / width.to_f <= 3
|
34
|
+
end
|
27
35
|
end
|
28
36
|
end
|
@@ -59,7 +59,13 @@ module ThumbnailScraper
|
|
59
59
|
|
60
60
|
def attached_images_urls
|
61
61
|
elements = document.xpath("//img/@src")
|
62
|
-
elements.map
|
62
|
+
elements.map do |element|
|
63
|
+
if element.value.start_with?("http://") || element.value.start_with?("https://")
|
64
|
+
image_url(element.value)
|
65
|
+
else
|
66
|
+
image_url("#{url.scheme}://#{url.host}#{element.value.gsub(/^\/?/,'/')}")
|
67
|
+
end
|
68
|
+
end
|
63
69
|
end
|
64
70
|
end
|
65
71
|
end
|
@@ -108,13 +108,13 @@ module ThumbnailScraper
|
|
108
108
|
before :each do
|
109
109
|
@image1 = mock("image")
|
110
110
|
@image2 = mock("image")
|
111
|
-
@
|
112
|
-
@
|
111
|
+
@image1.stub!(:exists?).and_return(true)
|
112
|
+
@image2.stub!(:exists?).and_return(true)
|
113
|
+
@image1.stub!(:valid?).and_return(true)
|
114
|
+
@image2.stub!(:valid?).and_return(false)
|
113
115
|
end
|
114
116
|
|
115
117
|
it "should use #image_is_valid? to validate image" do
|
116
|
-
@thumbnail_scraper.stub!(:image_is_valid?).with(@image1).and_return(true)
|
117
|
-
@thumbnail_scraper.stub!(:image_is_valid?).with(@image2).and_return(false)
|
118
118
|
@thumbnail_scraper.select_valid_images([@image1, @image2])
|
119
119
|
end
|
120
120
|
|
@@ -123,9 +123,10 @@ module ThumbnailScraper
|
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
126
|
-
describe "#
|
126
|
+
describe "Image#valid?" do
|
127
127
|
before :each do
|
128
|
-
@image =
|
128
|
+
@image = Image.new('http://example.com/image.png')
|
129
|
+
@image.stub!(:exists?).and_return(true)
|
129
130
|
@image.stub!(:width).and_return(100)
|
130
131
|
@image.stub!(:height).and_return(100)
|
131
132
|
end
|
@@ -136,7 +137,7 @@ module ThumbnailScraper
|
|
136
137
|
end
|
137
138
|
|
138
139
|
it "should be false" do
|
139
|
-
@
|
140
|
+
@image.valid?.should be_false
|
140
141
|
end
|
141
142
|
end
|
142
143
|
|
@@ -146,7 +147,7 @@ module ThumbnailScraper
|
|
146
147
|
end
|
147
148
|
|
148
149
|
it "should be false" do
|
149
|
-
@
|
150
|
+
@image.valid?.should be_false
|
150
151
|
end
|
151
152
|
end
|
152
153
|
|
@@ -157,7 +158,7 @@ module ThumbnailScraper
|
|
157
158
|
end
|
158
159
|
|
159
160
|
it "should be false" do
|
160
|
-
@
|
161
|
+
@image.valid?.should be_false
|
161
162
|
end
|
162
163
|
end
|
163
164
|
|
@@ -168,13 +169,13 @@ module ThumbnailScraper
|
|
168
169
|
end
|
169
170
|
|
170
171
|
it "should be false" do
|
171
|
-
@
|
172
|
+
@image.valid?.should be_false
|
172
173
|
end
|
173
174
|
end
|
174
175
|
|
175
176
|
context "otherwise" do
|
176
177
|
it "should be true" do
|
177
|
-
@
|
178
|
+
@image.valid?.should be_true
|
178
179
|
end
|
179
180
|
end
|
180
181
|
end
|
data/spec/units/webpage_spec.rb
CHANGED
@@ -160,9 +160,9 @@ module ThumbnailScraper
|
|
160
160
|
context "with images in body" do
|
161
161
|
before :each do
|
162
162
|
@webpage.body = File.read(asset_file_path("images.html"))
|
163
|
-
@webpage.stub!(:image_url).with("/images/subcatalog/smallest.jpg").and_return(:smallest_url)
|
164
|
-
@webpage.stub!(:image_url).with("/images/kitty.jpg").and_return(:kitty_url)
|
165
|
-
@webpage.stub!(:image_url).with("images/biggest.jpg").and_return(:biggest_url)
|
163
|
+
@webpage.stub!(:image_url).with("http://www.example.com/images/subcatalog/smallest.jpg").and_return(:smallest_url)
|
164
|
+
@webpage.stub!(:image_url).with("http://www.example.com/images/kitty.jpg").and_return(:kitty_url)
|
165
|
+
@webpage.stub!(:image_url).with("http://www.example.com/images/biggest.jpg").and_return(:biggest_url)
|
166
166
|
end
|
167
167
|
|
168
168
|
it "should return all urls of found images" do
|
data/thumbnail_scraper.gemspec
CHANGED
@@ -3,9 +3,9 @@ require File.expand_path('../lib/thumbnail_scraper/version', __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["Jeppe Liisberg", "Jan Filipowski"]
|
6
|
-
gem.email = ["
|
7
|
-
gem.description = %q{detect
|
8
|
-
gem.summary = %q{detect
|
6
|
+
gem.email = ["jeppe@liisberg.net"]
|
7
|
+
gem.description = %q{detect and fetch an image suitable as a thumbnail for any url}
|
8
|
+
gem.summary = %q{detect and fetch an image suitable as a thumbnail for any url}
|
9
9
|
gem.homepage = ""
|
10
10
|
|
11
11
|
gem.add_development_dependency "rspec"
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: thumbnail_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jeppe Liisberg
|
@@ -10,96 +9,87 @@ authors:
|
|
10
9
|
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2014-05-01 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: rspec
|
17
16
|
requirement: !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
17
|
requirements:
|
20
|
-
- -
|
18
|
+
- - ">="
|
21
19
|
- !ruby/object:Gem::Version
|
22
20
|
version: '0'
|
23
21
|
type: :development
|
24
22
|
prerelease: false
|
25
23
|
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
none: false
|
27
24
|
requirements:
|
28
|
-
- -
|
25
|
+
- - ">="
|
29
26
|
- !ruby/object:Gem::Version
|
30
27
|
version: '0'
|
31
28
|
- !ruby/object:Gem::Dependency
|
32
29
|
name: webmock
|
33
30
|
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
31
|
requirements:
|
36
|
-
- -
|
32
|
+
- - ">="
|
37
33
|
- !ruby/object:Gem::Version
|
38
34
|
version: '0'
|
39
35
|
type: :development
|
40
36
|
prerelease: false
|
41
37
|
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
38
|
requirements:
|
44
|
-
- -
|
39
|
+
- - ">="
|
45
40
|
- !ruby/object:Gem::Version
|
46
41
|
version: '0'
|
47
42
|
- !ruby/object:Gem::Dependency
|
48
43
|
name: httpclient
|
49
44
|
requirement: !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
45
|
requirements:
|
52
|
-
- -
|
46
|
+
- - ">="
|
53
47
|
- !ruby/object:Gem::Version
|
54
48
|
version: '0'
|
55
49
|
type: :runtime
|
56
50
|
prerelease: false
|
57
51
|
version_requirements: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
52
|
requirements:
|
60
|
-
- -
|
53
|
+
- - ">="
|
61
54
|
- !ruby/object:Gem::Version
|
62
55
|
version: '0'
|
63
56
|
- !ruby/object:Gem::Dependency
|
64
57
|
name: fastimage
|
65
58
|
requirement: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
59
|
requirements:
|
68
|
-
- -
|
60
|
+
- - ">="
|
69
61
|
- !ruby/object:Gem::Version
|
70
62
|
version: '0'
|
71
63
|
type: :runtime
|
72
64
|
prerelease: false
|
73
65
|
version_requirements: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
66
|
requirements:
|
76
|
-
- -
|
67
|
+
- - ">="
|
77
68
|
- !ruby/object:Gem::Version
|
78
69
|
version: '0'
|
79
70
|
- !ruby/object:Gem::Dependency
|
80
71
|
name: nokogiri
|
81
72
|
requirement: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
73
|
requirements:
|
84
|
-
- -
|
74
|
+
- - ">="
|
85
75
|
- !ruby/object:Gem::Version
|
86
76
|
version: '0'
|
87
77
|
type: :runtime
|
88
78
|
prerelease: false
|
89
79
|
version_requirements: !ruby/object:Gem::Requirement
|
90
|
-
none: false
|
91
80
|
requirements:
|
92
|
-
- -
|
81
|
+
- - ">="
|
93
82
|
- !ruby/object:Gem::Version
|
94
83
|
version: '0'
|
95
|
-
description: detect
|
84
|
+
description: detect and fetch an image suitable as a thumbnail for any url
|
96
85
|
email:
|
97
|
-
-
|
86
|
+
- jeppe@liisberg.net
|
98
87
|
executables: []
|
99
88
|
extensions: []
|
100
89
|
extra_rdoc_files: []
|
101
90
|
files:
|
102
|
-
- .gitignore
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rspec"
|
103
93
|
- Gemfile
|
104
94
|
- Gemfile.lock
|
105
95
|
- LICENSE
|
@@ -127,28 +117,27 @@ files:
|
|
127
117
|
- thumbnail_scraper.gemspec
|
128
118
|
homepage: ''
|
129
119
|
licenses: []
|
120
|
+
metadata: {}
|
130
121
|
post_install_message:
|
131
122
|
rdoc_options: []
|
132
123
|
require_paths:
|
133
124
|
- lib
|
134
125
|
required_ruby_version: !ruby/object:Gem::Requirement
|
135
|
-
none: false
|
136
126
|
requirements:
|
137
|
-
- -
|
127
|
+
- - ">="
|
138
128
|
- !ruby/object:Gem::Version
|
139
129
|
version: '0'
|
140
130
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
141
|
-
none: false
|
142
131
|
requirements:
|
143
|
-
- -
|
132
|
+
- - ">="
|
144
133
|
- !ruby/object:Gem::Version
|
145
134
|
version: '0'
|
146
135
|
requirements: []
|
147
136
|
rubyforge_project:
|
148
|
-
rubygems_version:
|
137
|
+
rubygems_version: 2.2.2
|
149
138
|
signing_key:
|
150
|
-
specification_version:
|
151
|
-
summary: detect
|
139
|
+
specification_version: 4
|
140
|
+
summary: detect and fetch an image suitable as a thumbnail for any url
|
152
141
|
test_files:
|
153
142
|
- spec/sample_pages/images.html
|
154
143
|
- spec/sample_pages/images/biggest.jpg
|