thumbnail_scraper 0.0.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +1 -0
- data/Gemfile.lock +6 -4
- data/README.md +5 -5
- data/lib/thumbnail_scraper.rb +1 -5
- data/lib/thumbnail_scraper/image.rb +8 -0
- data/lib/thumbnail_scraper/version.rb +1 -1
- data/lib/thumbnail_scraper/webpage.rb +7 -1
- data/spec/units/thumbnail_scraper_spec.rb +12 -11
- data/spec/units/webpage_spec.rb +3 -3
- data/thumbnail_scraper.gemspec +3 -3
- metadata +22 -33
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 85ec2104d8ba31fd892e4128ebd3d997c916a811
|
4
|
+
data.tar.gz: dd3187d661a612bc667306dc3d9bb181feb65905
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 47f9a35c9d88abd11dea525c92c6476ffb97f76de3da38ab29f5b6e8c1dd560bbc84b82e4056ee8df888607c56199265a541a7170f4be8d1255e3b3e8554d747
|
7
|
+
data.tar.gz: 50d9eb695180716d5cd098e016cbd4275c54db2fe9dfc60015ad10d719501c0d75b5c42958209fe294bc73546915120e6697a8633a6c0b37680911f5263d9133
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
thumbnail_scraper (0.0.
|
4
|
+
thumbnail_scraper (0.0.2)
|
5
5
|
fastimage
|
6
6
|
httpclient
|
7
7
|
nokogiri
|
@@ -12,9 +12,11 @@ GEM
|
|
12
12
|
addressable (2.2.8)
|
13
13
|
crack (0.3.1)
|
14
14
|
diff-lcs (1.1.3)
|
15
|
-
fastimage (1.2
|
16
|
-
httpclient (2.
|
17
|
-
|
15
|
+
fastimage (1.5.2)
|
16
|
+
httpclient (2.3.4.1)
|
17
|
+
mini_portile (0.5.3)
|
18
|
+
nokogiri (1.6.1)
|
19
|
+
mini_portile (~> 0.5.0)
|
18
20
|
rspec (2.11.0)
|
19
21
|
rspec-core (~> 2.11.0)
|
20
22
|
rspec-expectations (~> 2.11.0)
|
data/README.md
CHANGED
@@ -10,11 +10,11 @@ require 'thumbnail_scraper'
|
|
10
10
|
|
11
11
|
include ThumbnailScraper
|
12
12
|
scraper = ThumbnailScraper.new
|
13
|
-
image = scraper.
|
13
|
+
image = scraper.image_to_thumbnail_for_url("http://www.example.com/")
|
14
14
|
thumbnail_url = image.url
|
15
15
|
```
|
16
16
|
|
17
|
-
ThumbnailScraper#
|
17
|
+
ThumbnailScraper#image_to_thumbnail_for_url method returns Image object, which contains its size and url.
|
18
18
|
|
19
19
|
Suggested usage
|
20
20
|
---------------
|
@@ -25,13 +25,13 @@ We encourage you to use it with delayed_job as jobs queue and dragonfly as image
|
|
25
25
|
require 'thumbnail_scraper'
|
26
26
|
|
27
27
|
module Jobs
|
28
|
-
class
|
28
|
+
class ScrapeThumbnailJob < Struct.new(:page)
|
29
29
|
def perform
|
30
30
|
scraper = ::ThumbnailScraper::ThumbnailScraper.new
|
31
31
|
image = scraper.image_to_thumbnail_for_url(page.url)
|
32
32
|
page.thumbnail_url = image.url.to_s
|
33
33
|
page.save!
|
34
|
-
end
|
35
|
-
end
|
34
|
+
end
|
35
|
+
end
|
36
36
|
end
|
37
37
|
```
|
data/lib/thumbnail_scraper.rb
CHANGED
@@ -35,11 +35,7 @@ module ThumbnailScraper
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def select_valid_images(images)
|
38
|
-
images.select
|
39
|
-
end
|
40
|
-
|
41
|
-
def image_is_valid?(image)
|
42
|
-
image.width >= 50 && image.height >= 50 && image.width.to_f / image.height.to_f <= 3 && image.height.to_f / image.width.to_f <= 3
|
38
|
+
images.select(&:exists?).select(&:valid?)
|
43
39
|
end
|
44
40
|
end
|
45
41
|
end
|
@@ -9,6 +9,10 @@ module ThumbnailScraper
|
|
9
9
|
self.url = url
|
10
10
|
end
|
11
11
|
|
12
|
+
def exists?
|
13
|
+
size ? true : false
|
14
|
+
end
|
15
|
+
|
12
16
|
def size
|
13
17
|
@size ||= ::FastImage.size(url.to_s)
|
14
18
|
end
|
@@ -24,5 +28,9 @@ module ThumbnailScraper
|
|
24
28
|
def area
|
25
29
|
width * height
|
26
30
|
end
|
31
|
+
|
32
|
+
def valid?
|
33
|
+
width >= 50 && height >= 50 && width.to_f / height.to_f <= 3 && height.to_f / width.to_f <= 3
|
34
|
+
end
|
27
35
|
end
|
28
36
|
end
|
@@ -59,7 +59,13 @@ module ThumbnailScraper
|
|
59
59
|
|
60
60
|
def attached_images_urls
|
61
61
|
elements = document.xpath("//img/@src")
|
62
|
-
elements.map
|
62
|
+
elements.map do |element|
|
63
|
+
if element.value.start_with?("http://") || element.value.start_with?("https://")
|
64
|
+
image_url(element.value)
|
65
|
+
else
|
66
|
+
image_url("#{url.scheme}://#{url.host}#{element.value.gsub(/^\/?/,'/')}")
|
67
|
+
end
|
68
|
+
end
|
63
69
|
end
|
64
70
|
end
|
65
71
|
end
|
@@ -108,13 +108,13 @@ module ThumbnailScraper
|
|
108
108
|
before :each do
|
109
109
|
@image1 = mock("image")
|
110
110
|
@image2 = mock("image")
|
111
|
-
@
|
112
|
-
@
|
111
|
+
@image1.stub!(:exists?).and_return(true)
|
112
|
+
@image2.stub!(:exists?).and_return(true)
|
113
|
+
@image1.stub!(:valid?).and_return(true)
|
114
|
+
@image2.stub!(:valid?).and_return(false)
|
113
115
|
end
|
114
116
|
|
115
117
|
it "should use #image_is_valid? to validate image" do
|
116
|
-
@thumbnail_scraper.stub!(:image_is_valid?).with(@image1).and_return(true)
|
117
|
-
@thumbnail_scraper.stub!(:image_is_valid?).with(@image2).and_return(false)
|
118
118
|
@thumbnail_scraper.select_valid_images([@image1, @image2])
|
119
119
|
end
|
120
120
|
|
@@ -123,9 +123,10 @@ module ThumbnailScraper
|
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
126
|
-
describe "#
|
126
|
+
describe "Image#valid?" do
|
127
127
|
before :each do
|
128
|
-
@image =
|
128
|
+
@image = Image.new('http://example.com/image.png')
|
129
|
+
@image.stub!(:exists?).and_return(true)
|
129
130
|
@image.stub!(:width).and_return(100)
|
130
131
|
@image.stub!(:height).and_return(100)
|
131
132
|
end
|
@@ -136,7 +137,7 @@ module ThumbnailScraper
|
|
136
137
|
end
|
137
138
|
|
138
139
|
it "should be false" do
|
139
|
-
@
|
140
|
+
@image.valid?.should be_false
|
140
141
|
end
|
141
142
|
end
|
142
143
|
|
@@ -146,7 +147,7 @@ module ThumbnailScraper
|
|
146
147
|
end
|
147
148
|
|
148
149
|
it "should be false" do
|
149
|
-
@
|
150
|
+
@image.valid?.should be_false
|
150
151
|
end
|
151
152
|
end
|
152
153
|
|
@@ -157,7 +158,7 @@ module ThumbnailScraper
|
|
157
158
|
end
|
158
159
|
|
159
160
|
it "should be false" do
|
160
|
-
@
|
161
|
+
@image.valid?.should be_false
|
161
162
|
end
|
162
163
|
end
|
163
164
|
|
@@ -168,13 +169,13 @@ module ThumbnailScraper
|
|
168
169
|
end
|
169
170
|
|
170
171
|
it "should be false" do
|
171
|
-
@
|
172
|
+
@image.valid?.should be_false
|
172
173
|
end
|
173
174
|
end
|
174
175
|
|
175
176
|
context "otherwise" do
|
176
177
|
it "should be true" do
|
177
|
-
@
|
178
|
+
@image.valid?.should be_true
|
178
179
|
end
|
179
180
|
end
|
180
181
|
end
|
data/spec/units/webpage_spec.rb
CHANGED
@@ -160,9 +160,9 @@ module ThumbnailScraper
|
|
160
160
|
context "with images in body" do
|
161
161
|
before :each do
|
162
162
|
@webpage.body = File.read(asset_file_path("images.html"))
|
163
|
-
@webpage.stub!(:image_url).with("/images/subcatalog/smallest.jpg").and_return(:smallest_url)
|
164
|
-
@webpage.stub!(:image_url).with("/images/kitty.jpg").and_return(:kitty_url)
|
165
|
-
@webpage.stub!(:image_url).with("images/biggest.jpg").and_return(:biggest_url)
|
163
|
+
@webpage.stub!(:image_url).with("http://www.example.com/images/subcatalog/smallest.jpg").and_return(:smallest_url)
|
164
|
+
@webpage.stub!(:image_url).with("http://www.example.com/images/kitty.jpg").and_return(:kitty_url)
|
165
|
+
@webpage.stub!(:image_url).with("http://www.example.com/images/biggest.jpg").and_return(:biggest_url)
|
166
166
|
end
|
167
167
|
|
168
168
|
it "should return all urls of found images" do
|
data/thumbnail_scraper.gemspec
CHANGED
@@ -3,9 +3,9 @@ require File.expand_path('../lib/thumbnail_scraper/version', __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["Jeppe Liisberg", "Jan Filipowski"]
|
6
|
-
gem.email = ["
|
7
|
-
gem.description = %q{detect
|
8
|
-
gem.summary = %q{detect
|
6
|
+
gem.email = ["jeppe@liisberg.net"]
|
7
|
+
gem.description = %q{detect and fetch an image suitable as a thumbnail for any url}
|
8
|
+
gem.summary = %q{detect and fetch an image suitable as a thumbnail for any url}
|
9
9
|
gem.homepage = ""
|
10
10
|
|
11
11
|
gem.add_development_dependency "rspec"
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: thumbnail_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jeppe Liisberg
|
@@ -10,96 +9,87 @@ authors:
|
|
10
9
|
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2014-05-01 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: rspec
|
17
16
|
requirement: !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
17
|
requirements:
|
20
|
-
- -
|
18
|
+
- - ">="
|
21
19
|
- !ruby/object:Gem::Version
|
22
20
|
version: '0'
|
23
21
|
type: :development
|
24
22
|
prerelease: false
|
25
23
|
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
none: false
|
27
24
|
requirements:
|
28
|
-
- -
|
25
|
+
- - ">="
|
29
26
|
- !ruby/object:Gem::Version
|
30
27
|
version: '0'
|
31
28
|
- !ruby/object:Gem::Dependency
|
32
29
|
name: webmock
|
33
30
|
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
31
|
requirements:
|
36
|
-
- -
|
32
|
+
- - ">="
|
37
33
|
- !ruby/object:Gem::Version
|
38
34
|
version: '0'
|
39
35
|
type: :development
|
40
36
|
prerelease: false
|
41
37
|
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
38
|
requirements:
|
44
|
-
- -
|
39
|
+
- - ">="
|
45
40
|
- !ruby/object:Gem::Version
|
46
41
|
version: '0'
|
47
42
|
- !ruby/object:Gem::Dependency
|
48
43
|
name: httpclient
|
49
44
|
requirement: !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
45
|
requirements:
|
52
|
-
- -
|
46
|
+
- - ">="
|
53
47
|
- !ruby/object:Gem::Version
|
54
48
|
version: '0'
|
55
49
|
type: :runtime
|
56
50
|
prerelease: false
|
57
51
|
version_requirements: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
52
|
requirements:
|
60
|
-
- -
|
53
|
+
- - ">="
|
61
54
|
- !ruby/object:Gem::Version
|
62
55
|
version: '0'
|
63
56
|
- !ruby/object:Gem::Dependency
|
64
57
|
name: fastimage
|
65
58
|
requirement: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
59
|
requirements:
|
68
|
-
- -
|
60
|
+
- - ">="
|
69
61
|
- !ruby/object:Gem::Version
|
70
62
|
version: '0'
|
71
63
|
type: :runtime
|
72
64
|
prerelease: false
|
73
65
|
version_requirements: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
66
|
requirements:
|
76
|
-
- -
|
67
|
+
- - ">="
|
77
68
|
- !ruby/object:Gem::Version
|
78
69
|
version: '0'
|
79
70
|
- !ruby/object:Gem::Dependency
|
80
71
|
name: nokogiri
|
81
72
|
requirement: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
73
|
requirements:
|
84
|
-
- -
|
74
|
+
- - ">="
|
85
75
|
- !ruby/object:Gem::Version
|
86
76
|
version: '0'
|
87
77
|
type: :runtime
|
88
78
|
prerelease: false
|
89
79
|
version_requirements: !ruby/object:Gem::Requirement
|
90
|
-
none: false
|
91
80
|
requirements:
|
92
|
-
- -
|
81
|
+
- - ">="
|
93
82
|
- !ruby/object:Gem::Version
|
94
83
|
version: '0'
|
95
|
-
description: detect
|
84
|
+
description: detect and fetch an image suitable as a thumbnail for any url
|
96
85
|
email:
|
97
|
-
-
|
86
|
+
- jeppe@liisberg.net
|
98
87
|
executables: []
|
99
88
|
extensions: []
|
100
89
|
extra_rdoc_files: []
|
101
90
|
files:
|
102
|
-
- .gitignore
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rspec"
|
103
93
|
- Gemfile
|
104
94
|
- Gemfile.lock
|
105
95
|
- LICENSE
|
@@ -127,28 +117,27 @@ files:
|
|
127
117
|
- thumbnail_scraper.gemspec
|
128
118
|
homepage: ''
|
129
119
|
licenses: []
|
120
|
+
metadata: {}
|
130
121
|
post_install_message:
|
131
122
|
rdoc_options: []
|
132
123
|
require_paths:
|
133
124
|
- lib
|
134
125
|
required_ruby_version: !ruby/object:Gem::Requirement
|
135
|
-
none: false
|
136
126
|
requirements:
|
137
|
-
- -
|
127
|
+
- - ">="
|
138
128
|
- !ruby/object:Gem::Version
|
139
129
|
version: '0'
|
140
130
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
141
|
-
none: false
|
142
131
|
requirements:
|
143
|
-
- -
|
132
|
+
- - ">="
|
144
133
|
- !ruby/object:Gem::Version
|
145
134
|
version: '0'
|
146
135
|
requirements: []
|
147
136
|
rubyforge_project:
|
148
|
-
rubygems_version:
|
137
|
+
rubygems_version: 2.2.2
|
149
138
|
signing_key:
|
150
|
-
specification_version:
|
151
|
-
summary: detect
|
139
|
+
specification_version: 4
|
140
|
+
summary: detect and fetch an image suitable as a thumbnail for any url
|
152
141
|
test_files:
|
153
142
|
- spec/sample_pages/images.html
|
154
143
|
- spec/sample_pages/images/biggest.jpg
|