link_thumbnailer 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +334 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +210 -0
- data/Rakefile +9 -0
- data/lib/generators/link_thumbnailer/install_generator.rb +17 -0
- data/lib/generators/templates/initializer.rb +89 -0
- data/lib/link_thumbnailer.rb +38 -0
- data/lib/link_thumbnailer/configuration.rb +72 -0
- data/lib/link_thumbnailer/exceptions.rb +11 -0
- data/lib/link_thumbnailer/grader.rb +43 -0
- data/lib/link_thumbnailer/graders/base.rb +39 -0
- data/lib/link_thumbnailer/graders/html_attribute.rb +48 -0
- data/lib/link_thumbnailer/graders/length.rb +37 -0
- data/lib/link_thumbnailer/graders/link_density.rb +20 -0
- data/lib/link_thumbnailer/graders/position.rb +13 -0
- data/lib/link_thumbnailer/image_comparator.rb +26 -0
- data/lib/link_thumbnailer/image_comparators/base.rb +19 -0
- data/lib/link_thumbnailer/image_comparators/size.rb +13 -0
- data/lib/link_thumbnailer/image_parser.rb +62 -0
- data/lib/link_thumbnailer/image_validator.rb +32 -0
- data/lib/link_thumbnailer/model.rb +20 -0
- data/lib/link_thumbnailer/models/description.rb +37 -0
- data/lib/link_thumbnailer/models/favicon.rb +27 -0
- data/lib/link_thumbnailer/models/image.rb +56 -0
- data/lib/link_thumbnailer/models/title.rb +22 -0
- data/lib/link_thumbnailer/models/video.rb +44 -0
- data/lib/link_thumbnailer/models/website.rb +54 -0
- data/lib/link_thumbnailer/page.rb +43 -0
- data/lib/link_thumbnailer/parser.rb +15 -0
- data/lib/link_thumbnailer/processor.rb +128 -0
- data/lib/link_thumbnailer/railtie.rb +6 -0
- data/lib/link_thumbnailer/response.rb +39 -0
- data/lib/link_thumbnailer/scraper.rb +62 -0
- data/lib/link_thumbnailer/scrapers/base.rb +69 -0
- data/lib/link_thumbnailer/scrapers/default/base.rb +12 -0
- data/lib/link_thumbnailer/scrapers/default/description.rb +49 -0
- data/lib/link_thumbnailer/scrapers/default/favicon.rb +38 -0
- data/lib/link_thumbnailer/scrapers/default/images.rb +78 -0
- data/lib/link_thumbnailer/scrapers/default/title.rb +27 -0
- data/lib/link_thumbnailer/scrapers/default/videos.rb +18 -0
- data/lib/link_thumbnailer/scrapers/opengraph/base.rb +45 -0
- data/lib/link_thumbnailer/scrapers/opengraph/description.rb +12 -0
- data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +17 -0
- data/lib/link_thumbnailer/scrapers/opengraph/image.rb +107 -0
- data/lib/link_thumbnailer/scrapers/opengraph/images.rb +18 -0
- data/lib/link_thumbnailer/scrapers/opengraph/title.rb +12 -0
- data/lib/link_thumbnailer/scrapers/opengraph/video.rb +115 -0
- data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +18 -0
- data/lib/link_thumbnailer/uri.rb +20 -0
- data/lib/link_thumbnailer/version.rb +5 -0
- data/lib/link_thumbnailer/video_parser.rb +47 -0
- data/link_thumbnailer.gemspec +29 -0
- data/spec/configuration_spec.rb +61 -0
- data/spec/fixture_spec.rb +114 -0
- data/spec/fixtures/bar.png +2907 -0
- data/spec/fixtures/default_from_body.html +13 -0
- data/spec/fixtures/default_from_meta.html +12 -0
- data/spec/fixtures/foo.png +0 -0
- data/spec/fixtures/google_shift_jis.html +6 -0
- data/spec/fixtures/google_utf8.html +6 -0
- data/spec/fixtures/og_not_valid_example.html +12 -0
- data/spec/fixtures/og_valid_example.html +18 -0
- data/spec/fixtures/og_valid_multi_image_example.html +13 -0
- data/spec/fixtures/og_valid_multi_video_example.html +13 -0
- data/spec/grader_spec.rb +27 -0
- data/spec/graders/base_spec.rb +14 -0
- data/spec/graders/html_attribute_spec.rb +50 -0
- data/spec/graders/length_spec.rb +93 -0
- data/spec/graders/link_density_spec.rb +52 -0
- data/spec/graders/position_spec.rb +49 -0
- data/spec/image_comparators/size_spec.rb +58 -0
- data/spec/image_validator_spec.rb +37 -0
- data/spec/model_spec.rb +27 -0
- data/spec/models/description_spec.rb +66 -0
- data/spec/models/favicon_spec.rb +12 -0
- data/spec/models/image_spec.rb +95 -0
- data/spec/models/title_spec.rb +26 -0
- data/spec/models/video_spec.rb +49 -0
- data/spec/models/website_spec.rb +51 -0
- data/spec/page_spec.rb +28 -0
- data/spec/processor_spec.rb +410 -0
- data/spec/response_spec.rb +62 -0
- data/spec/scraper_spec.rb +70 -0
- data/spec/scrapers/base_spec.rb +69 -0
- data/spec/scrapers/opengraph/base_spec.rb +96 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/uri_spec.rb +44 -0
- data/spec/video_parser_spec.rb +148 -0
- metadata +271 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 281992a6403788c75541c08d69586ab9b85c097e
|
4
|
+
data.tar.gz: 7cd32584b4c3a96b6a54185576597115fdb84764
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d77d1ae2545a92f79363a1e90d19edd61c610080d5497207cf22bdb9afe0d0acb5569aa95d7c16485dc9aa3ff0beac5c900b7cea24278d87d5a0812900f7aa91
|
7
|
+
data.tar.gz: 69c55be441d1581c50e54b3f3652130d8c4f9220047714607d978a993c6be581b28b70cb19a62ad3f27dc09c91455d67f4e83bceff753b451a4d3b3a1e532763
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4.0
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,334 @@
|
|
1
|
+
# 3.3.2
|
2
|
+
|
3
|
+
- Frozen strings https://github.com/gottfrois/link_thumbnailer/pull/125
|
4
|
+
|
5
|
+
# 3.3.1
|
6
|
+
|
7
|
+
- Gem upgrade (json)
|
8
|
+
|
9
|
+
# 3.3.0
|
10
|
+
|
11
|
+
- Allows to configure overrided http headers
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
LinkThumbnailer.configure do |config|
|
15
|
+
config.http_override_headers = { 'Accept-Encoding' => 'none', ... }
|
16
|
+
end
|
17
|
+
```
|
18
|
+
|
19
|
+
# 3.2.1
|
20
|
+
|
21
|
+
- Fixes #88
|
22
|
+
- Override User-Agent header properly
|
23
|
+
- Match xpath nodes if attribute content is present
|
24
|
+
- Avoid nil urls in image parser
|
25
|
+
|
26
|
+
# 3.2.0
|
27
|
+
|
28
|
+
Makes scrapers configurable by allowing to set the scraping strategy:
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
LinkThumbnailer.configure do |config|
|
32
|
+
config.scrapers = [:opengraph, :default]
|
33
|
+
end
|
34
|
+
```
|
35
|
+
|
36
|
+
`opengraph` use the [Open Graph Protocol](http://ogp.me/).
|
37
|
+
`default` use a homemade algorithm
|
38
|
+
|
39
|
+
# 3.1.2
|
40
|
+
|
41
|
+
Allows to customize ideal description length
|
42
|
+
|
43
|
+
Pass the :ideal_description_length option to the Graders::Length initializer to customize
|
44
|
+
the ideal description length of a website. In the rails initializer:
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
LinkThumbnailer.configure do |config|
|
48
|
+
config.graders = [
|
49
|
+
->(description) { ::LinkThumbnailer::Graders::Length.new(description, ideal_description_length: 500) },
|
50
|
+
->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
|
51
|
+
->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
|
52
|
+
->(description) { ::LinkThumbnailer::Graders::Position.new(description, weigth: 3) },
|
53
|
+
->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) },
|
54
|
+
]
|
55
|
+
end
|
56
|
+
```
|
57
|
+
|
58
|
+
Will default to `120` characters. More information about how the gem manage to find the best description can be found at
|
59
|
+
http://www.codeids.com/2015/06/27/how-to-find-best-description-of-a-website-using-linkthumbnailer/
|
60
|
+
|
61
|
+
# 3.1.1
|
62
|
+
|
63
|
+
- Upgrade `video_info` gem
|
64
|
+
- Fixes https://github.com/gottfrois/link_thumbnailer/issues/69
|
65
|
+
|
66
|
+
# 3.1.0
|
67
|
+
|
68
|
+
- Fix an issue when image sizes could not be retrieved.
|
69
|
+
- Grapers now accepts an optional parameter to customize the weigth of the grader in the probablity computation.
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
LinkThumbnailer::Graders::Position.new(description, weigth: 3)
|
73
|
+
```
|
74
|
+
|
75
|
+
Will give a 3 times more weigth to the `Position` grader compare to other graders.
|
76
|
+
By default all graders have a weigth of `1` except the above position grader since position should play a bigger role in
|
77
|
+
order to find good description candidates.
|
78
|
+
|
79
|
+
# 3.0.3
|
80
|
+
|
81
|
+
- Fix an issue when dealing with absolute urls. https://github.com/gottfrois/link_thumbnailer/issues/68
|
82
|
+
- Fix an issue with http redirection and location header not beeing present. https://github.com/gottfrois/link_thumbnailer/issues/70
|
83
|
+
- Rescue and raise custom LinkThumbnailer exceptions. https://github.com/gottfrois/link_thumbnailer/issues/71
|
84
|
+
|
85
|
+
# 3.0.2
|
86
|
+
|
87
|
+
- Replace FastImage gem dependency by [ImageInfo](https://github.com/gottfrois/image_info) to improve performances when
|
88
|
+
fetching multiple images size information. Benchmark shows an order of magnitude improvement response time.
|
89
|
+
- Fixes [#57](https://github.com/gottfrois/link_thumbnailer/issues/57)
|
90
|
+
|
91
|
+
# 3.0.1
|
92
|
+
|
93
|
+
- Remove useless dependencies
|
94
|
+
|
95
|
+
# 3.0.0
|
96
|
+
|
97
|
+
- Improved description sorting.
|
98
|
+
- Refactored how graders work. More information [here](https://github.com/gottfrois/link_thumbnailer/wiki/How-to-build-your-own-Grader%3F)
|
99
|
+
|
100
|
+
# 2.6.1
|
101
|
+
|
102
|
+
- Fix remove useless dependency
|
103
|
+
|
104
|
+
# 2.6.0
|
105
|
+
|
106
|
+
- Introduce new `raise_on_invalid_format` option (false by default) to raise `LinkThumbnailer::FormatNotSupported` if http `Content-Type` is invalid. Fixes #61 and #64.
|
107
|
+
|
108
|
+
# 2.5.2
|
109
|
+
|
110
|
+
- Fix OpenURI::HTTPError exception raised when video_info gem is not able to parse video metadata. Fixes #60.
|
111
|
+
|
112
|
+
# 2.5.1
|
113
|
+
|
114
|
+
- Implement `Set-Cookie` header between http redirections to set cookies when site requires it. Fixes #55.
|
115
|
+
|
116
|
+
# 2.5.0
|
117
|
+
|
118
|
+
- Handles seamlessly `og:image` and `og:image:url`
|
119
|
+
- Handles seamlessly `og:video` and `og:video:url`
|
120
|
+
- Handles `og:video:width` and `og:video:height` for one video only (please create a ticket if you want support for multiple videos/images width & height)
|
121
|
+
- Fix calling `as_json` on `website` to return `as_json` representation of videos and images, not just their urls
|
122
|
+
- Gem updates and fix rspec deprecation warnings
|
123
|
+
|
124
|
+
# 2.4.0
|
125
|
+
|
126
|
+
- Handle connection through proxy automatically using the `ENV['HTTP_PROXY']` variable thanks to [taganaka](https://github.com/taganaka).
|
127
|
+
|
128
|
+
# 2.3.2
|
129
|
+
|
130
|
+
- Fix an issue with vimeo opengraph urls. Fixes [#46](https://github.com/gottfrois/link_thumbnailer/pull/46)
|
131
|
+
|
132
|
+
# 2.3.1
|
133
|
+
|
134
|
+
- Fix an issue with the link density grader caused by links with image instead of text. Fixes [#45](https://github.com/gottfrois/link_thumbnailer/issues/45)
|
135
|
+
|
136
|
+
# 2.3.0
|
137
|
+
|
138
|
+
- Add requested favicon scraper [#40](https://github.com/gottfrois/link_thumbnailer/issues/40)
|
139
|
+
|
140
|
+
Add `:favicon` to `config.attributes` in LinkThumbnailer initializer:
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
config.attributes = [:title, :images, :description, :videos, :favicon]
|
144
|
+
```
|
145
|
+
|
146
|
+
Then
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
o = LinkThumbnailer.generate('https://github.com')
|
150
|
+
o.favicon
|
151
|
+
=> "https://github.com/fluidicon.png"
|
152
|
+
```
|
153
|
+
|
154
|
+
# 2.2.3
|
155
|
+
|
156
|
+
- Fixes [#41](https://github.com/gottfrois/link_thumbnailer/issues/41)
|
157
|
+
|
158
|
+
# 2.2.2
|
159
|
+
|
160
|
+
- Fixes [#41](https://github.com/gottfrois/link_thumbnailer/issues/41)
|
161
|
+
|
162
|
+
# 2.2.1
|
163
|
+
|
164
|
+
- Fix issue when computing link density ratio
|
165
|
+
|
166
|
+
# 2.2.0
|
167
|
+
|
168
|
+
- Add support for `og:video`
|
169
|
+
- Add support for multiple `og:video` as well
|
170
|
+
|
171
|
+
LinkThumbnailer will return the following json for example:
|
172
|
+
|
173
|
+
```ruby
|
174
|
+
{
|
175
|
+
id: 'x7lni3',
|
176
|
+
src: 'http://www.dailymotion.com/video/x7lni3',
|
177
|
+
size: [640, 360],
|
178
|
+
duration: 136,
|
179
|
+
provider: 'Dailymotion',
|
180
|
+
embed_code: '<iframe src="//www.dailymotion.com/embed/video/x7lni3" frameborder="0" allowfullscreen="allowfullscreen"></iframe>'
|
181
|
+
}
|
182
|
+
```
|
183
|
+
|
184
|
+
Add `:videos` into your `config/initializers/link_thumbnailer.rb` `attributes` config in order to start scraping videos.
|
185
|
+
|
186
|
+
Ex:
|
187
|
+
|
188
|
+
```ruby
|
189
|
+
config.attributes = [:title, :images, :description, :videos]
|
190
|
+
```
|
191
|
+
|
192
|
+
# 2.1.0
|
193
|
+
|
194
|
+
- Increased `og:image` scraping performance by parsing `og:image:width` and `og:image:height` attribute if specified
|
195
|
+
- Introduced `image_stats` option to allow disabling image size and type parsing causing performance issues.
|
196
|
+
|
197
|
+
When disabled, size will be `[0, 0]` and type will be `nil`
|
198
|
+
|
199
|
+
# 2.0.4
|
200
|
+
|
201
|
+
- Fixes [#39](https://github.com/gottfrois/link_thumbnailer/issues/39)
|
202
|
+
|
203
|
+
# 2.0.3
|
204
|
+
|
205
|
+
- Fixes [#37](https://github.com/gottfrois/link_thumbnailer/issues/37)
|
206
|
+
|
207
|
+
# 2.0.2
|
208
|
+
|
209
|
+
- Fix couple of issues with `URI` class namespace
|
210
|
+
|
211
|
+
# 2.0.1
|
212
|
+
|
213
|
+
- Fix issue with image parser (fastimage) when given an URI instance instead of a string
|
214
|
+
|
215
|
+
# 2.0.0
|
216
|
+
|
217
|
+
- Fully refactored LinkThumbnailer
|
218
|
+
- Introduced [Graders](https://github.com/gottfrois/link_thumbnailer/wiki/How-to-build-your-own-Grader%3F)
|
219
|
+
- Introduced [Scrapers](https://github.com/gottfrois/link_thumbnailer/wiki/Attributes-option-explained)
|
220
|
+
- Ability to score descriptions
|
221
|
+
- Ability to fetch multiple `og:image`
|
222
|
+
- Fixed memoized run-time options
|
223
|
+
- Fixed some website urls not working
|
224
|
+
- Refactor ugly code
|
225
|
+
- More specs
|
226
|
+
- Removed `PreviewsController` since it does not add much value. Simply create your own and use the `to_json` method.
|
227
|
+
|
228
|
+
To update from `1.x.x` to `2.x.x` you need to run `rails g link_thumbnailer:install` to get the new configuration file.
|
229
|
+
If you used the `PreviewsController` feature, you need to build it yourself since it is not supported anymore.
|
230
|
+
|
231
|
+
# 1.1.2
|
232
|
+
|
233
|
+
- Fix issue with FastImage URLs [https://github.com/gottfrois/link_thumbnailer/pull/31](https://github.com/gottfrois/link_thumbnailer/pull/31)
|
234
|
+
|
235
|
+
# 1.1.1
|
236
|
+
|
237
|
+
- Fix route helper not working under rails 4.
|
238
|
+
|
239
|
+
# 1.1.0
|
240
|
+
|
241
|
+
- Replace RMagick by [FastImage](https://github.com/sdsykes/fastimage)
|
242
|
+
- Rename `rmagick_attributes` config into `image_attributes`
|
243
|
+
|
244
|
+
# 1.0.9
|
245
|
+
|
246
|
+
- Fix issue when Location header used a relative path instead of an absolute path
|
247
|
+
- Update gemfile to be more flexible when using Hashie gem
|
248
|
+
|
249
|
+
# 1.0.8
|
250
|
+
|
251
|
+
- Thanks to [juriglx](https://github.com/juriglx), support for canonical urls
|
252
|
+
- Bug fixes
|
253
|
+
|
254
|
+
# 1.0.7
|
255
|
+
|
256
|
+
- Fix: Issue with preview controller
|
257
|
+
|
258
|
+
# 1.0.6
|
259
|
+
|
260
|
+
- Fix: Issue when setting `strict` option. Always returning OG representation.
|
261
|
+
|
262
|
+
# 1.0.5
|
263
|
+
|
264
|
+
- Thanks to [phlegx](https://github.com/phlegx), support for timeout http connection through configurations.
|
265
|
+
|
266
|
+
# 1.0.4
|
267
|
+
|
268
|
+
- Fix issue #7: nil img was returned when exception is raised. Now skiping nil images in results.
|
269
|
+
- Thanks to [phlegx](https://github.com/phlegx), support for SSL and User Agent customization through configurations.
|
270
|
+
|
271
|
+
# 1.0.3
|
272
|
+
|
273
|
+
- Fix issue #5: Url was incorect in case of HTTP Redirections.
|
274
|
+
|
275
|
+
# 1.0.2
|
276
|
+
|
277
|
+
- Feature: User can now set options at runtime by passing valid options to ```generate``` method
|
278
|
+
- Bug fix when doing ```rails g link_thumbnailer:install``` by explicitly specifying the scope of Rails
|
279
|
+
|
280
|
+
# 1.0.1
|
281
|
+
|
282
|
+
- Refactor LinkThumbnailer#generate method to have a cleaner code
|
283
|
+
|
284
|
+
# 1.0.0
|
285
|
+
|
286
|
+
- Update readme
|
287
|
+
- Add PreviewController for easy integration with user's app
|
288
|
+
- Add link_thumbnailer routes for easy integration with user's app
|
289
|
+
- Refactor some code
|
290
|
+
- Change 'to_a' method to 'to_hash' in object model
|
291
|
+
|
292
|
+
# 0.0.6
|
293
|
+
|
294
|
+
- Update readme
|
295
|
+
- Add `to_a` to WebImage class
|
296
|
+
- Refactor `to_json` for WebImage class
|
297
|
+
- Add specs corresponding
|
298
|
+
|
299
|
+
# 0.0.5
|
300
|
+
|
301
|
+
- Bug fix
|
302
|
+
- Remove `require 'rails'` from spec_helper.rb
|
303
|
+
- Remove rails dependences (blank? method) in code
|
304
|
+
- Spec fix
|
305
|
+
|
306
|
+
# 0.0.4
|
307
|
+
|
308
|
+
- Add specs for almost all classes
|
309
|
+
- Add a method `to_json` for WebImage class to be able to get a usable array of images' attributes
|
310
|
+
|
311
|
+
# 0.0.3
|
312
|
+
|
313
|
+
- Add specs for LinkThumbnailer class
|
314
|
+
- Refactor config system, now using dedicated configuration class
|
315
|
+
|
316
|
+
# 0.0.2
|
317
|
+
|
318
|
+
- Added Rspec
|
319
|
+
- Bug fixes:
|
320
|
+
- Now checking if attribute is blank for LinkThumbnailer::Object.valid? method
|
321
|
+
|
322
|
+
# 0.0.1
|
323
|
+
|
324
|
+
- LinkThumbnailer::Object
|
325
|
+
- LinkThumbnailer::Doc
|
326
|
+
- LinkThumbnailer::DocParser
|
327
|
+
- LinkThumbnailer::Fetcher
|
328
|
+
- LinkThumbnailer::ImgComparator
|
329
|
+
- LinkThumbnailer::ImgParser
|
330
|
+
- LinkThumbnailer::ImgUrlFilter
|
331
|
+
- LinkThumbnailer::Opengraph
|
332
|
+
- LinkThumbnailer::WebImage
|
333
|
+
- LinkThumbnailer.configure
|
334
|
+
- LinkThumbnailer.generate
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Pierre-Louis Gottfrois
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,210 @@
|
|
1
|
+
# LinkThumbnailer
|
2
|
+
|
3
|
+
[![Code Climate](https://codeclimate.com/github/gottfrois/link_thumbnailer.png)](https://codeclimate.com/github/gottfrois/link_thumbnailer)
|
4
|
+
[![Build Status](https://travis-ci.org/gottfrois/link_thumbnailer.png?branch=master)](https://travis-ci.org/gottfrois/link_thumbnailer)
|
5
|
+
[![Gem Version](https://badge.fury.io/rb/link_thumbnailer.svg)](http://badge.fury.io/rb/link_thumbnailer)
|
6
|
+
[![Dependency Status](https://gemnasium.com/gottfrois/link_thumbnailer.svg)](https://gemnasium.com/gottfrois/link_thumbnailer)
|
7
|
+
|
8
|
+
Ruby gem generating image thumbnails from a given URL. Rank them and give you back an object containing images and website informations. Works like Facebook link previewer.
|
9
|
+
|
10
|
+
Demo Application is [here](http://link-thumbnailer-demo.herokuapp.com/) !
|
11
|
+
The source code of the Demo Application is hosted [here](https://github.com/gottfrois/link_thumbnailer_demo)!
|
12
|
+
|
13
|
+
**OpenSource** and **Free** API available [here](https://github.com/gottfrois/link_thumbnailer_api) !
|
14
|
+
|
15
|
+
## Features
|
16
|
+
|
17
|
+
- Dead simple.
|
18
|
+
- Support [OpenGraph](http://ogp.me/) protocol.
|
19
|
+
- Find and sort images that best represent what the page is about.
|
20
|
+
- Find and rate description that best represent what the page is about.
|
21
|
+
- Allow for custom class to sort the website descriptions yourself.
|
22
|
+
- Support image urls blacklisting (advertisements).
|
23
|
+
- Works with and without Rails.
|
24
|
+
- Fully customizable.
|
25
|
+
- Fully tested.
|
26
|
+
|
27
|
+
## Installation
|
28
|
+
|
29
|
+
Add this line to your application's Gemfile:
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
gem 'link_thumbnailer'
|
33
|
+
```
|
34
|
+
|
35
|
+
And then execute:
|
36
|
+
|
37
|
+
$ bundle
|
38
|
+
|
39
|
+
Or install it yourself as:
|
40
|
+
|
41
|
+
$ gem install link_thumbnailer
|
42
|
+
|
43
|
+
Run:
|
44
|
+
|
45
|
+
$ rails g link_thumbnailer:install
|
46
|
+
|
47
|
+
This will add `link_thumbnailer.rb` to `config/initializers/`.
|
48
|
+
|
49
|
+
## Usage
|
50
|
+
|
51
|
+
Run `irb` and require the gem:
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
require 'link_thumbnailer'
|
55
|
+
```
|
56
|
+
|
57
|
+
The gem handle regular website but also website that use the [Opengraph](http://ogp.me/) protocol.
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
object = LinkThumbnailer.generate('http://stackoverflow.com')
|
61
|
+
=> #<LinkThumbnailer::Models::Website:...>
|
62
|
+
|
63
|
+
object.title
|
64
|
+
=> "Stack Overflow"
|
65
|
+
|
66
|
+
object.favicon
|
67
|
+
=> "//cdn.sstatic.net/stackoverflow/img/favicon.ico?v=038622610830"
|
68
|
+
|
69
|
+
object.description
|
70
|
+
=> "Q&A for professional and enthusiast programmers"
|
71
|
+
|
72
|
+
object.images.first.src.to_s
|
73
|
+
=> "http://cdn.sstatic.net/stackoverflow/img/apple-touch-icon@2.png?v=fde65a5a78c6"
|
74
|
+
```
|
75
|
+
|
76
|
+
LinkThumbnailer `generate` method return an instance of `LinkThumbnailer::Models::Website` that respond to `to_json` and `as_json` as you would expect:
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
object.to_json
|
80
|
+
=> "{\"url\":\"http://stackoverflow.com\",\"title\":\"Stack Overflow\",\"description\":\"Q&A for professional and enthusiast programmers\",\"images\":[{\"src\":\"http://cdn.sstatic.net/stackoverflow/img/apple-touch-icon@2.png?v=fde65a5a78c6\",\"size\":[316,316],\"type\":\"png\"}]}"
|
81
|
+
```
|
82
|
+
|
83
|
+
|
84
|
+
## Configuration
|
85
|
+
|
86
|
+
LinkThumbnailer comes with default configuration values. You can change default value by overriding them in a rails initializer:
|
87
|
+
|
88
|
+
In `config/initializers/link_thumbnailer.rb`
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
LinkThumbnailer.configure do |config|
|
92
|
+
# Numbers of redirects before raising an exception when trying to parse given url.
|
93
|
+
#
|
94
|
+
# config.redirect_limit = 3
|
95
|
+
|
96
|
+
# Set user agent
|
97
|
+
#
|
98
|
+
# config.user_agent = 'link_thumbnailer'
|
99
|
+
|
100
|
+
# Enable or disable SSL verification
|
101
|
+
#
|
102
|
+
# config.verify_ssl = true
|
103
|
+
|
104
|
+
# The amount of time in seconds to wait for a connection to be opened.
|
105
|
+
# If the HTTP object cannot open a connection in this many seconds,
|
106
|
+
# it raises a Net::OpenTimeout exception.
|
107
|
+
#
|
108
|
+
# See http://www.ruby-doc.org/stdlib-2.1.1/libdoc/net/http/rdoc/Net/HTTP.html#open_timeout
|
109
|
+
#
|
110
|
+
# config.http_open_timeout = 5
|
111
|
+
|
112
|
+
# List of blacklisted urls you want to skip when searching for images.
|
113
|
+
#
|
114
|
+
# config.blacklist_urls = [
|
115
|
+
# %r{^http://ad\.doubleclick\.net/},
|
116
|
+
# %r{^http://b\.scorecardresearch\.com/},
|
117
|
+
# %r{^http://pixel\.quantserve\.com/},
|
118
|
+
# %r{^http://s7\.addthis\.com/}
|
119
|
+
# ]
|
120
|
+
|
121
|
+
# List of attributes you want LinkThumbnailer to fetch on a website.
|
122
|
+
#
|
123
|
+
# config.attributes = [:title, :images, :description, :videos, :favicon]
|
124
|
+
|
125
|
+
# List of procedures used to rate the website description. Add you custom class
|
126
|
+
# here. See wiki for more details on how to build your own graders.
|
127
|
+
#
|
128
|
+
# config.graders = [
|
129
|
+
# ->(description) { ::LinkThumbnailer::Graders::Length.new(description) },
|
130
|
+
# ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
|
131
|
+
# ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
|
132
|
+
# ->(description) { ::LinkThumbnailer::Graders::Position.new(description, weight: 3) },
|
133
|
+
# ->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) }
|
134
|
+
# ]
|
135
|
+
|
136
|
+
# Minimum description length for a website.
|
137
|
+
#
|
138
|
+
# config.description_min_length = 25
|
139
|
+
|
140
|
+
# Regex of words considered positive to rate website description.
|
141
|
+
#
|
142
|
+
# config.positive_regex = /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
|
143
|
+
|
144
|
+
# Regex of words considered negative to rate website description.
|
145
|
+
#
|
146
|
+
# config.negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i
|
147
|
+
|
148
|
+
# Numbers of images to fetch. Fetching too many images will be slow.
|
149
|
+
# Note that LinkThumbnailer will only sort fetched images between each other.
|
150
|
+
# Meaning that they could be a "better" image on the page.
|
151
|
+
#
|
152
|
+
# config.image_limit = 5
|
153
|
+
|
154
|
+
# Whether you want LinkThumbnailer to return image size and type or not.
|
155
|
+
# Setting this value to false will increase performance since for each images, LinkThumbnailer
|
156
|
+
# does not have to fetch its size and type.
|
157
|
+
#
|
158
|
+
# config.image_stats = true
|
159
|
+
#
|
160
|
+
# Whether you want LinkThumbnailer to raise an exception if the Content-Type of the HTTP request
|
161
|
+
# is not an html or xml.
|
162
|
+
#
|
163
|
+
# config.raise_on_invalid_format = false
|
164
|
+
#
|
165
|
+
# Sets number of concurrent http connections that can be opened to fetch images informations such as size and type.
|
166
|
+
#
|
167
|
+
# config.max_concurrency = 20
|
168
|
+
|
169
|
+
# Sets the default encoding.
|
170
|
+
#
|
171
|
+
# config.encoding = 'utf-8'
|
172
|
+
end
|
173
|
+
```
|
174
|
+
|
175
|
+
Or at runtime:
|
176
|
+
|
177
|
+
```ruby
|
178
|
+
object = LinkThumbnailer.generate('http://stackoverflow.com', redirect_limit: 5, user_agent: 'foo')
|
179
|
+
```
|
180
|
+
|
181
|
+
Note that runtime options will override default global configuration.
|
182
|
+
|
183
|
+
See [Configuration Options Explained](https://github.com/gottfrois/link_thumbnailer/wiki/Configuration-options-explained) for more details on each configuration options.
|
184
|
+
|
185
|
+
## Exceptions
|
186
|
+
|
187
|
+
LinkThumbnailer defines a list of custom exceptions you may want to rescue in your code. All the following exceptions inherit from `LinkThumbnailer::Exceptions`:
|
188
|
+
|
189
|
+
* `RedirectLimit` -- raised when redirection threshold defined in config is reached
|
190
|
+
* `BadUriFormat` -- raised when url given is not a valid HTTP url
|
191
|
+
* `FormatNotSupported` -- raised when the `Content-Type` of the HTTP request is not supported (not `html`)
|
192
|
+
|
193
|
+
You can rescue from any LinkThumbnailer exceptions using the following code:
|
194
|
+
|
195
|
+
```ruby
|
196
|
+
begin
|
197
|
+
LinkThumbnailer.generate('http://foo.com')
|
198
|
+
rescue LinkThumbnailer::Exceptions => e
|
199
|
+
# do something
|
200
|
+
end
|
201
|
+
```
|
202
|
+
|
203
|
+
## Contributing
|
204
|
+
|
205
|
+
1. Fork it
|
206
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
207
|
+
3. Run the specs (`bundle exec rspec spec`)
|
208
|
+
4. Commit your changes (`git commit -am 'Added some feature'`)
|
209
|
+
5. Push to the branch (`git push origin my-new-feature`)
|
210
|
+
6. Create new Pull Request
|