link_thumbnailer 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +5 -0
  6. data/CHANGELOG.md +334 -0
  7. data/Gemfile +12 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +210 -0
  10. data/Rakefile +9 -0
  11. data/lib/generators/link_thumbnailer/install_generator.rb +17 -0
  12. data/lib/generators/templates/initializer.rb +89 -0
  13. data/lib/link_thumbnailer.rb +38 -0
  14. data/lib/link_thumbnailer/configuration.rb +72 -0
  15. data/lib/link_thumbnailer/exceptions.rb +11 -0
  16. data/lib/link_thumbnailer/grader.rb +43 -0
  17. data/lib/link_thumbnailer/graders/base.rb +39 -0
  18. data/lib/link_thumbnailer/graders/html_attribute.rb +48 -0
  19. data/lib/link_thumbnailer/graders/length.rb +37 -0
  20. data/lib/link_thumbnailer/graders/link_density.rb +20 -0
  21. data/lib/link_thumbnailer/graders/position.rb +13 -0
  22. data/lib/link_thumbnailer/image_comparator.rb +26 -0
  23. data/lib/link_thumbnailer/image_comparators/base.rb +19 -0
  24. data/lib/link_thumbnailer/image_comparators/size.rb +13 -0
  25. data/lib/link_thumbnailer/image_parser.rb +62 -0
  26. data/lib/link_thumbnailer/image_validator.rb +32 -0
  27. data/lib/link_thumbnailer/model.rb +20 -0
  28. data/lib/link_thumbnailer/models/description.rb +37 -0
  29. data/lib/link_thumbnailer/models/favicon.rb +27 -0
  30. data/lib/link_thumbnailer/models/image.rb +56 -0
  31. data/lib/link_thumbnailer/models/title.rb +22 -0
  32. data/lib/link_thumbnailer/models/video.rb +44 -0
  33. data/lib/link_thumbnailer/models/website.rb +54 -0
  34. data/lib/link_thumbnailer/page.rb +43 -0
  35. data/lib/link_thumbnailer/parser.rb +15 -0
  36. data/lib/link_thumbnailer/processor.rb +128 -0
  37. data/lib/link_thumbnailer/railtie.rb +6 -0
  38. data/lib/link_thumbnailer/response.rb +39 -0
  39. data/lib/link_thumbnailer/scraper.rb +62 -0
  40. data/lib/link_thumbnailer/scrapers/base.rb +69 -0
  41. data/lib/link_thumbnailer/scrapers/default/base.rb +12 -0
  42. data/lib/link_thumbnailer/scrapers/default/description.rb +49 -0
  43. data/lib/link_thumbnailer/scrapers/default/favicon.rb +38 -0
  44. data/lib/link_thumbnailer/scrapers/default/images.rb +78 -0
  45. data/lib/link_thumbnailer/scrapers/default/title.rb +27 -0
  46. data/lib/link_thumbnailer/scrapers/default/videos.rb +18 -0
  47. data/lib/link_thumbnailer/scrapers/opengraph/base.rb +45 -0
  48. data/lib/link_thumbnailer/scrapers/opengraph/description.rb +12 -0
  49. data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +17 -0
  50. data/lib/link_thumbnailer/scrapers/opengraph/image.rb +107 -0
  51. data/lib/link_thumbnailer/scrapers/opengraph/images.rb +18 -0
  52. data/lib/link_thumbnailer/scrapers/opengraph/title.rb +12 -0
  53. data/lib/link_thumbnailer/scrapers/opengraph/video.rb +115 -0
  54. data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +18 -0
  55. data/lib/link_thumbnailer/uri.rb +20 -0
  56. data/lib/link_thumbnailer/version.rb +5 -0
  57. data/lib/link_thumbnailer/video_parser.rb +47 -0
  58. data/link_thumbnailer.gemspec +29 -0
  59. data/spec/configuration_spec.rb +61 -0
  60. data/spec/fixture_spec.rb +114 -0
  61. data/spec/fixtures/bar.png +2907 -0
  62. data/spec/fixtures/default_from_body.html +13 -0
  63. data/spec/fixtures/default_from_meta.html +12 -0
  64. data/spec/fixtures/foo.png +0 -0
  65. data/spec/fixtures/google_shift_jis.html +6 -0
  66. data/spec/fixtures/google_utf8.html +6 -0
  67. data/spec/fixtures/og_not_valid_example.html +12 -0
  68. data/spec/fixtures/og_valid_example.html +18 -0
  69. data/spec/fixtures/og_valid_multi_image_example.html +13 -0
  70. data/spec/fixtures/og_valid_multi_video_example.html +13 -0
  71. data/spec/grader_spec.rb +27 -0
  72. data/spec/graders/base_spec.rb +14 -0
  73. data/spec/graders/html_attribute_spec.rb +50 -0
  74. data/spec/graders/length_spec.rb +93 -0
  75. data/spec/graders/link_density_spec.rb +52 -0
  76. data/spec/graders/position_spec.rb +49 -0
  77. data/spec/image_comparators/size_spec.rb +58 -0
  78. data/spec/image_validator_spec.rb +37 -0
  79. data/spec/model_spec.rb +27 -0
  80. data/spec/models/description_spec.rb +66 -0
  81. data/spec/models/favicon_spec.rb +12 -0
  82. data/spec/models/image_spec.rb +95 -0
  83. data/spec/models/title_spec.rb +26 -0
  84. data/spec/models/video_spec.rb +49 -0
  85. data/spec/models/website_spec.rb +51 -0
  86. data/spec/page_spec.rb +28 -0
  87. data/spec/processor_spec.rb +410 -0
  88. data/spec/response_spec.rb +62 -0
  89. data/spec/scraper_spec.rb +70 -0
  90. data/spec/scrapers/base_spec.rb +69 -0
  91. data/spec/scrapers/opengraph/base_spec.rb +96 -0
  92. data/spec/spec_helper.rb +11 -0
  93. data/spec/uri_spec.rb +44 -0
  94. data/spec/video_parser_spec.rb +148 -0
  95. metadata +271 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 281992a6403788c75541c08d69586ab9b85c097e
4
+ data.tar.gz: 7cd32584b4c3a96b6a54185576597115fdb84764
5
+ SHA512:
6
+ metadata.gz: d77d1ae2545a92f79363a1e90d19edd61c610080d5497207cf22bdb9afe0d0acb5569aa95d7c16485dc9aa3ff0beac5c900b7cea24278d87d5a0812900f7aa91
7
+ data.tar.gz: 69c55be441d1581c50e54b3f3652130d8c4f9220047714607d978a993c6be581b28b70cb19a62ad3f27dc09c91455d67f4e83bceff753b451a4d3b3a1e532763
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .project
19
+ .coveralls.yml
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --format=documentation
@@ -0,0 +1 @@
1
+ 2.4.0
@@ -0,0 +1,5 @@
1
+ bundler_args: --without development
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.2
5
+ - 2.4.0
@@ -0,0 +1,334 @@
1
+ # 3.3.2
2
+
3
+ - Frozen strings https://github.com/gottfrois/link_thumbnailer/pull/125
4
+
5
+ # 3.3.1
6
+
7
+ - Gem upgrade (json)
8
+
9
+ # 3.3.0
10
+
11
+ - Allows to configure overrided http headers
12
+
13
+ ```ruby
14
+ LinkThumbnailer.configure do |config|
15
+ config.http_override_headers = { 'Accept-Encoding' => 'none', ... }
16
+ end
17
+ ```
18
+
19
+ # 3.2.1
20
+
21
+ - Fixes #88
22
+ - Override User-Agent header properly
23
+ - Match xpath nodes if attribute content is present
24
+ - Avoid nil urls in image parser
25
+
26
+ # 3.2.0
27
+
28
+ Makes scrapers configurable by allowing to set the scraping strategy:
29
+
30
+ ```ruby
31
+ LinkThumbnailer.configure do |config|
32
+ config.scrapers = [:opengraph, :default]
33
+ end
34
+ ```
35
+
36
+ `opengraph` use the [Open Graph Protocol](http://ogp.me/).
37
+ `default` use a homemade algorithm
38
+
39
+ # 3.1.2
40
+
41
+ Allows to customize ideal description length
42
+
43
+ Pass the :ideal_description_length option to the Graders::Length initializer to customize
44
+ the ideal description length of a website. In the rails initializer:
45
+
46
+ ```ruby
47
+ LinkThumbnailer.configure do |config|
48
+ config.graders = [
49
+ ->(description) { ::LinkThumbnailer::Graders::Length.new(description, ideal_description_length: 500) },
50
+ ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
51
+ ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
52
+ ->(description) { ::LinkThumbnailer::Graders::Position.new(description, weigth: 3) },
53
+ ->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) },
54
+ ]
55
+ end
56
+ ```
57
+
58
+ Will default to `120` characters. More information about how the gem manage to find the best description can be found at
59
+ http://www.codeids.com/2015/06/27/how-to-find-best-description-of-a-website-using-linkthumbnailer/
60
+
61
+ # 3.1.1
62
+
63
+ - Upgrade `video_info` gem
64
+ - Fixes https://github.com/gottfrois/link_thumbnailer/issues/69
65
+
66
+ # 3.1.0
67
+
68
+ - Fix an issue when image sizes could not be retrieved.
69
+ - Grapers now accepts an optional parameter to customize the weigth of the grader in the probablity computation.
70
+
71
+ ```ruby
72
+ LinkThumbnailer::Graders::Position.new(description, weigth: 3)
73
+ ```
74
+
75
+ Will give a 3 times more weigth to the `Position` grader compare to other graders.
76
+ By default all graders have a weigth of `1` except the above position grader since position should play a bigger role in
77
+ order to find good description candidates.
78
+
79
+ # 3.0.3
80
+
81
+ - Fix an issue when dealing with absolute urls. https://github.com/gottfrois/link_thumbnailer/issues/68
82
+ - Fix an issue with http redirection and location header not beeing present. https://github.com/gottfrois/link_thumbnailer/issues/70
83
+ - Rescue and raise custom LinkThumbnailer exceptions. https://github.com/gottfrois/link_thumbnailer/issues/71
84
+
85
+ # 3.0.2
86
+
87
+ - Replace FastImage gem dependency by [ImageInfo](https://github.com/gottfrois/image_info) to improve performances when
88
+ fetching multiple images size information. Benchmark shows an order of magnitude improvement response time.
89
+ - Fixes [#57](https://github.com/gottfrois/link_thumbnailer/issues/57)
90
+
91
+ # 3.0.1
92
+
93
+ - Remove useless dependencies
94
+
95
+ # 3.0.0
96
+
97
+ - Improved description sorting.
98
+ - Refactored how graders work. More information [here](https://github.com/gottfrois/link_thumbnailer/wiki/How-to-build-your-own-Grader%3F)
99
+
100
+ # 2.6.1
101
+
102
+ - Fix remove useless dependency
103
+
104
+ # 2.6.0
105
+
106
+ - Introduce new `raise_on_invalid_format` option (false by default) to raise `LinkThumbnailer::FormatNotSupported` if http `Content-Type` is invalid. Fixes #61 and #64.
107
+
108
+ # 2.5.2
109
+
110
+ - Fix OpenURI::HTTPError exception raised when video_info gem is not able to parse video metadata. Fixes #60.
111
+
112
+ # 2.5.1
113
+
114
+ - Implement `Set-Cookie` header between http redirections to set cookies when site requires it. Fixes #55.
115
+
116
+ # 2.5.0
117
+
118
+ - Handles seamlessly `og:image` and `og:image:url`
119
+ - Handles seamlessly `og:video` and `og:video:url`
120
+ - Handles `og:video:width` and `og:video:height` for one video only (please create a ticket if you want support for multiple videos/images width & height)
121
+ - Fix calling `as_json` on `website` to return `as_json` representation of videos and images, not just their urls
122
+ - Gem updates and fix rspec deprecation warnings
123
+
124
+ # 2.4.0
125
+
126
+ - Handle connection through proxy automatically using the `ENV['HTTP_PROXY']` variable thanks to [taganaka](https://github.com/taganaka).
127
+
128
+ # 2.3.2
129
+
130
+ - Fix an issue with vimeo opengraph urls. Fixes [#46](https://github.com/gottfrois/link_thumbnailer/pull/46)
131
+
132
+ # 2.3.1
133
+
134
+ - Fix an issue with the link density grader caused by links with image instead of text. Fixes [#45](https://github.com/gottfrois/link_thumbnailer/issues/45)
135
+
136
+ # 2.3.0
137
+
138
+ - Add requested favicon scraper [#40](https://github.com/gottfrois/link_thumbnailer/issues/40)
139
+
140
+ Add `:favicon` to `config.attributes` in LinkThumbnailer initializer:
141
+
142
+ ```ruby
143
+ config.attributes = [:title, :images, :description, :videos, :favicon]
144
+ ```
145
+
146
+ Then
147
+
148
+ ```ruby
149
+ o = LinkThumbnailer.generate('https://github.com')
150
+ o.favicon
151
+ => "https://github.com/fluidicon.png"
152
+ ```
153
+
154
+ # 2.2.3
155
+
156
+ - Fixes [#41](https://github.com/gottfrois/link_thumbnailer/issues/41)
157
+
158
+ # 2.2.2
159
+
160
+ - Fixes [#41](https://github.com/gottfrois/link_thumbnailer/issues/41)
161
+
162
+ # 2.2.1
163
+
164
+ - Fix issue when computing link density ratio
165
+
166
+ # 2.2.0
167
+
168
+ - Add support for `og:video`
169
+ - Add support for multiple `og:video` as well
170
+
171
+ LinkThumbnailer will return the following json for example:
172
+
173
+ ```ruby
174
+ {
175
+ id: 'x7lni3',
176
+ src: 'http://www.dailymotion.com/video/x7lni3',
177
+ size: [640, 360],
178
+ duration: 136,
179
+ provider: 'Dailymotion',
180
+ embed_code: '<iframe src="//www.dailymotion.com/embed/video/x7lni3" frameborder="0" allowfullscreen="allowfullscreen"></iframe>'
181
+ }
182
+ ```
183
+
184
+ Add `:videos` into your `config/initializers/link_thumbnailer.rb` `attributes` config in order to start scraping videos.
185
+
186
+ Ex:
187
+
188
+ ```ruby
189
+ config.attributes = [:title, :images, :description, :videos]
190
+ ```
191
+
192
+ # 2.1.0
193
+
194
+ - Increased `og:image` scraping performance by parsing `og:image:width` and `og:image:height` attribute if specified
195
+ - Introduced `image_stats` option to allow disabling image size and type parsing causing performance issues.
196
+
197
+ When disabled, size will be `[0, 0]` and type will be `nil`
198
+
199
+ # 2.0.4
200
+
201
+ - Fixes [#39](https://github.com/gottfrois/link_thumbnailer/issues/39)
202
+
203
+ # 2.0.3
204
+
205
+ - Fixes [#37](https://github.com/gottfrois/link_thumbnailer/issues/37)
206
+
207
+ # 2.0.2
208
+
209
+ - Fix couple of issues with `URI` class namespace
210
+
211
+ # 2.0.1
212
+
213
+ - Fix issue with image parser (fastimage) when given an URI instance instead of a string
214
+
215
+ # 2.0.0
216
+
217
+ - Fully refactored LinkThumbnailer
218
+ - Introduced [Graders](https://github.com/gottfrois/link_thumbnailer/wiki/How-to-build-your-own-Grader%3F)
219
+ - Introduced [Scrapers](https://github.com/gottfrois/link_thumbnailer/wiki/Attributes-option-explained)
220
+ - Ability to score descriptions
221
+ - Ability to fetch multiple `og:image`
222
+ - Fixed memoized run-time options
223
+ - Fixed some website urls not working
224
+ - Refactor ugly code
225
+ - More specs
226
+ - Removed `PreviewsController` since it does not add much value. Simply create your own and use the `to_json` method.
227
+
228
+ To update from `1.x.x` to `2.x.x` you need to run `rails g link_thumbnailer:install` to get the new configuration file.
229
+ If you used the `PreviewsController` feature, you need to build it yourself since it is not supported anymore.
230
+
231
+ # 1.1.2
232
+
233
+ - Fix issue with FastImage URLs [https://github.com/gottfrois/link_thumbnailer/pull/31](https://github.com/gottfrois/link_thumbnailer/pull/31)
234
+
235
+ # 1.1.1
236
+
237
+ - Fix route helper not working under rails 4.
238
+
239
+ # 1.1.0
240
+
241
+ - Replace RMagick by [FastImage](https://github.com/sdsykes/fastimage)
242
+ - Rename `rmagick_attributes` config into `image_attributes`
243
+
244
+ # 1.0.9
245
+
246
+ - Fix issue when Location header used a relative path instead of an absolute path
247
+ - Update gemfile to be more flexible when using Hashie gem
248
+
249
+ # 1.0.8
250
+
251
+ - Thanks to [juriglx](https://github.com/juriglx), support for canonical urls
252
+ - Bug fixes
253
+
254
+ # 1.0.7
255
+
256
+ - Fix: Issue with preview controller
257
+
258
+ # 1.0.6
259
+
260
+ - Fix: Issue when setting `strict` option. Always returning OG representation.
261
+
262
+ # 1.0.5
263
+
264
+ - Thanks to [phlegx](https://github.com/phlegx), support for timeout http connection through configurations.
265
+
266
+ # 1.0.4
267
+
268
+ - Fix issue #7: nil img was returned when exception is raised. Now skiping nil images in results.
269
+ - Thanks to [phlegx](https://github.com/phlegx), support for SSL and User Agent customization through configurations.
270
+
271
+ # 1.0.3
272
+
273
+ - Fix issue #5: Url was incorect in case of HTTP Redirections.
274
+
275
+ # 1.0.2
276
+
277
+ - Feature: User can now set options at runtime by passing valid options to ```generate``` method
278
+ - Bug fix when doing ```rails g link_thumbnailer:install``` by explicitly specifying the scope of Rails
279
+
280
+ # 1.0.1
281
+
282
+ - Refactor LinkThumbnailer#generate method to have a cleaner code
283
+
284
+ # 1.0.0
285
+
286
+ - Update readme
287
+ - Add PreviewController for easy integration with user's app
288
+ - Add link_thumbnailer routes for easy integration with user's app
289
+ - Refactor some code
290
+ - Change 'to_a' method to 'to_hash' in object model
291
+
292
+ # 0.0.6
293
+
294
+ - Update readme
295
+ - Add `to_a` to WebImage class
296
+ - Refactor `to_json` for WebImage class
297
+ - Add specs corresponding
298
+
299
+ # 0.0.5
300
+
301
+ - Bug fix
302
+ - Remove `require 'rails'` from spec_helper.rb
303
+ - Remove rails dependences (blank? method) in code
304
+ - Spec fix
305
+
306
+ # 0.0.4
307
+
308
+ - Add specs for almost all classes
309
+ - Add a method `to_json` for WebImage class to be able to get a usable array of images' attributes
310
+
311
+ # 0.0.3
312
+
313
+ - Add specs for LinkThumbnailer class
314
+ - Refactor config system, now using dedicated configuration class
315
+
316
+ # 0.0.2
317
+
318
+ - Added Rspec
319
+ - Bug fixes:
320
+ - Now checking if attribute is blank for LinkThumbnailer::Object.valid? method
321
+
322
+ # 0.0.1
323
+
324
+ - LinkThumbnailer::Object
325
+ - LinkThumbnailer::Doc
326
+ - LinkThumbnailer::DocParser
327
+ - LinkThumbnailer::Fetcher
328
+ - LinkThumbnailer::ImgComparator
329
+ - LinkThumbnailer::ImgParser
330
+ - LinkThumbnailer::ImgUrlFilter
331
+ - LinkThumbnailer::Opengraph
332
+ - LinkThumbnailer::WebImage
333
+ - LinkThumbnailer.configure
334
+ - LinkThumbnailer.generate
data/Gemfile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in link_thumbnailer.gemspec
6
+ gemspec
7
+
8
+ group :development, :test do
9
+ gem 'rspec', '>= 2.14'
10
+ gem 'webmock', '>= 1.14'
11
+ gem 'pry', '>= 0.9'
12
+ end
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Pierre-Louis Gottfrois
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,210 @@
1
+ # LinkThumbnailer
2
+
3
+ [![Code Climate](https://codeclimate.com/github/gottfrois/link_thumbnailer.png)](https://codeclimate.com/github/gottfrois/link_thumbnailer)
4
+ [![Build Status](https://travis-ci.org/gottfrois/link_thumbnailer.png?branch=master)](https://travis-ci.org/gottfrois/link_thumbnailer)
5
+ [![Gem Version](https://badge.fury.io/rb/link_thumbnailer.svg)](http://badge.fury.io/rb/link_thumbnailer)
6
+ [![Dependency Status](https://gemnasium.com/gottfrois/link_thumbnailer.svg)](https://gemnasium.com/gottfrois/link_thumbnailer)
7
+
8
+ Ruby gem generating image thumbnails from a given URL. Rank them and give you back an object containing images and website informations. Works like Facebook link previewer.
9
+
10
+ Demo Application is [here](http://link-thumbnailer-demo.herokuapp.com/) !
11
+ The source code of the Demo Application is hosted [here](https://github.com/gottfrois/link_thumbnailer_demo)!
12
+
13
+ **OpenSource** and **Free** API available [here](https://github.com/gottfrois/link_thumbnailer_api) !
14
+
15
+ ## Features
16
+
17
+ - Dead simple.
18
+ - Support [OpenGraph](http://ogp.me/) protocol.
19
+ - Find and sort images that best represent what the page is about.
20
+ - Find and rate description that best represent what the page is about.
21
+ - Allow for custom class to sort the website descriptions yourself.
22
+ - Support image urls blacklisting (advertisements).
23
+ - Works with and without Rails.
24
+ - Fully customizable.
25
+ - Fully tested.
26
+
27
+ ## Installation
28
+
29
+ Add this line to your application's Gemfile:
30
+
31
+ ```ruby
32
+ gem 'link_thumbnailer'
33
+ ```
34
+
35
+ And then execute:
36
+
37
+ $ bundle
38
+
39
+ Or install it yourself as:
40
+
41
+ $ gem install link_thumbnailer
42
+
43
+ Run:
44
+
45
+ $ rails g link_thumbnailer:install
46
+
47
+ This will add `link_thumbnailer.rb` to `config/initializers/`.
48
+
49
+ ## Usage
50
+
51
+ Run `irb` and require the gem:
52
+
53
+ ```ruby
54
+ require 'link_thumbnailer'
55
+ ```
56
+
57
+ The gem handle regular website but also website that use the [Opengraph](http://ogp.me/) protocol.
58
+
59
+ ```ruby
60
+ object = LinkThumbnailer.generate('http://stackoverflow.com')
61
+ => #<LinkThumbnailer::Models::Website:...>
62
+
63
+ object.title
64
+ => "Stack Overflow"
65
+
66
+ object.favicon
67
+ => "//cdn.sstatic.net/stackoverflow/img/favicon.ico?v=038622610830"
68
+
69
+ object.description
70
+ => "Q&A for professional and enthusiast programmers"
71
+
72
+ object.images.first.src.to_s
73
+ => "http://cdn.sstatic.net/stackoverflow/img/apple-touch-icon@2.png?v=fde65a5a78c6"
74
+ ```
75
+
76
+ LinkThumbnailer `generate` method return an instance of `LinkThumbnailer::Models::Website` that respond to `to_json` and `as_json` as you would expect:
77
+
78
+ ```ruby
79
+ object.to_json
80
+ => "{\"url\":\"http://stackoverflow.com\",\"title\":\"Stack Overflow\",\"description\":\"Q&A for professional and enthusiast programmers\",\"images\":[{\"src\":\"http://cdn.sstatic.net/stackoverflow/img/apple-touch-icon@2.png?v=fde65a5a78c6\",\"size\":[316,316],\"type\":\"png\"}]}"
81
+ ```
82
+
83
+
84
+ ## Configuration
85
+
86
+ LinkThumbnailer comes with default configuration values. You can change default value by overriding them in a rails initializer:
87
+
88
+ In `config/initializers/link_thumbnailer.rb`
89
+
90
+ ```ruby
91
+ LinkThumbnailer.configure do |config|
92
+ # Numbers of redirects before raising an exception when trying to parse given url.
93
+ #
94
+ # config.redirect_limit = 3
95
+
96
+ # Set user agent
97
+ #
98
+ # config.user_agent = 'link_thumbnailer'
99
+
100
+ # Enable or disable SSL verification
101
+ #
102
+ # config.verify_ssl = true
103
+
104
+ # The amount of time in seconds to wait for a connection to be opened.
105
+ # If the HTTP object cannot open a connection in this many seconds,
106
+ # it raises a Net::OpenTimeout exception.
107
+ #
108
+ # See http://www.ruby-doc.org/stdlib-2.1.1/libdoc/net/http/rdoc/Net/HTTP.html#open_timeout
109
+ #
110
+ # config.http_open_timeout = 5
111
+
112
+ # List of blacklisted urls you want to skip when searching for images.
113
+ #
114
+ # config.blacklist_urls = [
115
+ # %r{^http://ad\.doubleclick\.net/},
116
+ # %r{^http://b\.scorecardresearch\.com/},
117
+ # %r{^http://pixel\.quantserve\.com/},
118
+ # %r{^http://s7\.addthis\.com/}
119
+ # ]
120
+
121
+ # List of attributes you want LinkThumbnailer to fetch on a website.
122
+ #
123
+ # config.attributes = [:title, :images, :description, :videos, :favicon]
124
+
125
+ # List of procedures used to rate the website description. Add you custom class
126
+ # here. See wiki for more details on how to build your own graders.
127
+ #
128
+ # config.graders = [
129
+ # ->(description) { ::LinkThumbnailer::Graders::Length.new(description) },
130
+ # ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
131
+ # ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
132
+ # ->(description) { ::LinkThumbnailer::Graders::Position.new(description, weight: 3) },
133
+ # ->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) }
134
+ # ]
135
+
136
+ # Minimum description length for a website.
137
+ #
138
+ # config.description_min_length = 25
139
+
140
+ # Regex of words considered positive to rate website description.
141
+ #
142
+ # config.positive_regex = /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
143
+
144
+ # Regex of words considered negative to rate website description.
145
+ #
146
+ # config.negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i
147
+
148
+ # Numbers of images to fetch. Fetching too many images will be slow.
149
+ # Note that LinkThumbnailer will only sort fetched images between each other.
150
+ # Meaning that they could be a "better" image on the page.
151
+ #
152
+ # config.image_limit = 5
153
+
154
+ # Whether you want LinkThumbnailer to return image size and type or not.
155
+ # Setting this value to false will increase performance since for each images, LinkThumbnailer
156
+ # does not have to fetch its size and type.
157
+ #
158
+ # config.image_stats = true
159
+ #
160
+ # Whether you want LinkThumbnailer to raise an exception if the Content-Type of the HTTP request
161
+ # is not an html or xml.
162
+ #
163
+ # config.raise_on_invalid_format = false
164
+ #
165
+ # Sets number of concurrent http connections that can be opened to fetch images informations such as size and type.
166
+ #
167
+ # config.max_concurrency = 20
168
+
169
+ # Sets the default encoding.
170
+ #
171
+ # config.encoding = 'utf-8'
172
+ end
173
+ ```
174
+
175
+ Or at runtime:
176
+
177
+ ```ruby
178
+ object = LinkThumbnailer.generate('http://stackoverflow.com', redirect_limit: 5, user_agent: 'foo')
179
+ ```
180
+
181
+ Note that runtime options will override default global configuration.
182
+
183
+ See [Configuration Options Explained](https://github.com/gottfrois/link_thumbnailer/wiki/Configuration-options-explained) for more details on each configuration options.
184
+
185
+ ## Exceptions
186
+
187
+ LinkThumbnailer defines a list of custom exceptions you may want to rescue in your code. All the following exceptions inherit from `LinkThumbnailer::Exceptions`:
188
+
189
+ * `RedirectLimit` -- raised when redirection threshold defined in config is reached
190
+ * `BadUriFormat` -- raised when url given is not a valid HTTP url
191
+ * `FormatNotSupported` -- raised when the `Content-Type` of the HTTP request is not supported (not `html`)
192
+
193
+ You can rescue from any LinkThumbnailer exceptions using the following code:
194
+
195
+ ```ruby
196
+ begin
197
+ LinkThumbnailer.generate('http://foo.com')
198
+ rescue LinkThumbnailer::Exceptions => e
199
+ # do something
200
+ end
201
+ ```
202
+
203
+ ## Contributing
204
+
205
+ 1. Fork it
206
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
207
+ 3. Run the specs (`bundle exec rspec spec`)
208
+ 4. Commit your changes (`git commit -am 'Added some feature'`)
209
+ 5. Push to the branch (`git push origin my-new-feature`)
210
+ 6. Create new Pull Request