link_thumbnailer 3.2.0 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +5 -5
  2. data/.ruby-version +1 -0
  3. data/.travis.yml +2 -4
  4. data/CHANGELOG.md +252 -75
  5. data/Gemfile +5 -3
  6. data/README.md +4 -0
  7. data/Rakefile +2 -0
  8. data/lib/generators/link_thumbnailer/install_generator.rb +2 -0
  9. data/lib/generators/templates/initializer.rb +15 -0
  10. data/lib/link_thumbnailer.rb +2 -0
  11. data/lib/link_thumbnailer/configuration.rb +74 -68
  12. data/lib/link_thumbnailer/exceptions.rb +3 -0
  13. data/lib/link_thumbnailer/grader.rb +2 -0
  14. data/lib/link_thumbnailer/graders/base.rb +2 -0
  15. data/lib/link_thumbnailer/graders/html_attribute.rb +2 -0
  16. data/lib/link_thumbnailer/graders/length.rb +2 -0
  17. data/lib/link_thumbnailer/graders/link_density.rb +2 -0
  18. data/lib/link_thumbnailer/graders/position.rb +2 -0
  19. data/lib/link_thumbnailer/image_comparator.rb +2 -0
  20. data/lib/link_thumbnailer/image_comparators/base.rb +2 -0
  21. data/lib/link_thumbnailer/image_comparators/size.rb +2 -0
  22. data/lib/link_thumbnailer/image_parser.rb +13 -1
  23. data/lib/link_thumbnailer/image_validator.rb +2 -0
  24. data/lib/link_thumbnailer/model.rb +20 -17
  25. data/lib/link_thumbnailer/models/description.rb +2 -0
  26. data/lib/link_thumbnailer/models/favicon.rb +2 -0
  27. data/lib/link_thumbnailer/models/image.rb +56 -54
  28. data/lib/link_thumbnailer/models/title.rb +2 -0
  29. data/lib/link_thumbnailer/models/video.rb +2 -0
  30. data/lib/link_thumbnailer/models/website.rb +54 -52
  31. data/lib/link_thumbnailer/page.rb +4 -1
  32. data/lib/link_thumbnailer/parser.rb +3 -1
  33. data/lib/link_thumbnailer/processor.rb +38 -5
  34. data/lib/link_thumbnailer/railtie.rb +2 -0
  35. data/lib/link_thumbnailer/response.rb +39 -0
  36. data/lib/link_thumbnailer/scraper.rb +62 -60
  37. data/lib/link_thumbnailer/scrapers/base.rb +69 -67
  38. data/lib/link_thumbnailer/scrapers/default/base.rb +2 -0
  39. data/lib/link_thumbnailer/scrapers/default/description.rb +2 -0
  40. data/lib/link_thumbnailer/scrapers/default/favicon.rb +16 -2
  41. data/lib/link_thumbnailer/scrapers/default/images.rb +5 -1
  42. data/lib/link_thumbnailer/scrapers/default/title.rb +2 -0
  43. data/lib/link_thumbnailer/scrapers/default/videos.rb +2 -0
  44. data/lib/link_thumbnailer/scrapers/opengraph/base.rb +2 -0
  45. data/lib/link_thumbnailer/scrapers/opengraph/description.rb +2 -0
  46. data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +2 -0
  47. data/lib/link_thumbnailer/scrapers/opengraph/image.rb +7 -1
  48. data/lib/link_thumbnailer/scrapers/opengraph/images.rb +2 -0
  49. data/lib/link_thumbnailer/scrapers/opengraph/title.rb +2 -0
  50. data/lib/link_thumbnailer/scrapers/opengraph/video.rb +2 -0
  51. data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +2 -0
  52. data/lib/link_thumbnailer/uri.rb +20 -0
  53. data/lib/link_thumbnailer/version.rb +3 -1
  54. data/lib/link_thumbnailer/video_parser.rb +3 -1
  55. data/link_thumbnailer.gemspec +8 -6
  56. data/spec/configuration_spec.rb +4 -2
  57. data/spec/fixture_spec.rb +21 -0
  58. data/spec/fixtures/default_with_few_favicons.html +15 -0
  59. data/spec/fixtures/google_shift_jis.html +6 -0
  60. data/spec/fixtures/google_utf8.html +6 -0
  61. data/spec/fixtures/google_utf8_no_meta_charset.html +6 -0
  62. data/spec/fixtures/with_related_path_in_href.html +13 -0
  63. data/spec/fixtures/with_root_path_in_href.html +13 -0
  64. data/spec/grader_spec.rb +3 -1
  65. data/spec/graders/base_spec.rb +2 -0
  66. data/spec/graders/html_attribute_spec.rb +9 -7
  67. data/spec/graders/length_spec.rb +10 -6
  68. data/spec/graders/link_density_spec.rb +4 -2
  69. data/spec/graders/position_spec.rb +8 -6
  70. data/spec/image_comparators/size_spec.rb +2 -0
  71. data/spec/image_validator_spec.rb +3 -1
  72. data/spec/model_spec.rb +2 -0
  73. data/spec/models/description_spec.rb +3 -1
  74. data/spec/models/favicon_spec.rb +2 -0
  75. data/spec/models/image_spec.rb +6 -4
  76. data/spec/models/title_spec.rb +2 -0
  77. data/spec/models/video_spec.rb +7 -5
  78. data/spec/models/website_spec.rb +5 -3
  79. data/spec/page_spec.rb +2 -0
  80. data/spec/processor_spec.rb +74 -23
  81. data/spec/response_spec.rb +84 -0
  82. data/spec/scraper_spec.rb +6 -4
  83. data/spec/scrapers/base_spec.rb +6 -4
  84. data/spec/scrapers/opengraph/base_spec.rb +8 -6
  85. data/spec/spec_helper.rb +2 -0
  86. data/spec/uri_spec.rb +44 -0
  87. data/spec/video_parser_spec.rb +15 -13
  88. metadata +37 -19
data/Gemfile CHANGED
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in link_thumbnailer.gemspec
4
6
  gemspec
5
7
 
6
8
  group :development, :test do
7
- gem 'rspec', '~> 2.14'
8
- gem 'webmock', '~> 1.14'
9
- gem 'pry', '~> 0.9'
9
+ gem 'rspec', '>= 2.14'
10
+ gem 'webmock', '>= 1.14'
11
+ gem 'pry', '>= 0.9'
10
12
  end
data/README.md CHANGED
@@ -165,6 +165,10 @@ LinkThumbnailer.configure do |config|
165
165
  # Sets number of concurrent http connections that can be opened to fetch images informations such as size and type.
166
166
  #
167
167
  # config.max_concurrency = 20
168
+
169
+ # Sets the default encoding.
170
+ #
171
+ # config.encoding = 'utf-8'
168
172
  end
169
173
  ```
170
174
 
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bundler/gem_tasks'
2
4
  require 'rspec/core/rake_task'
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LinkThumbnailer
2
4
  module Generators
3
5
  class InstallGenerator < ::Rails::Generators::Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Use this hook to configure LinkThumbnailer bahaviors.
2
4
  LinkThumbnailer.configure do |config|
3
5
  # Numbers of redirects before raising an exception when trying to parse given url.
@@ -33,6 +35,11 @@ LinkThumbnailer.configure do |config|
33
35
  #
34
36
  # config.attributes = [:title, :images, :description, :videos, :favicon]
35
37
 
38
+ # Prior favicon size. If the website doesn't have such size - returns the first favicon.
39
+ # Value should be like '32x32' or '16x16'. Default value is nil.
40
+ #
41
+ # config.favicon_size = nil
42
+
36
43
  # List of procedures used to rate the website description. Add you custom class
37
44
  # here. See wiki for more details on how to build your own graders.
38
45
  #
@@ -80,4 +87,12 @@ LinkThumbnailer.configure do |config|
80
87
  # Defines the strategies to use to scrap the website. See the [Open Graph Protocol](http://ogp.me/) for more information.
81
88
  #
82
89
  # config.scrapers = [:opengraph, :default]
90
+
91
+ # Limit for download size in bytes. When using ActiveSupport, you can also use values like 10.megabytes
92
+ #
93
+ # config.download_size_limit = 10 * 1024 * 1024
94
+
95
+ # Sets the default encoding.
96
+ #
97
+ # config.encoding = 'utf-8'
83
98
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'json'
2
4
  require 'link_thumbnailer/version'
3
5
  require 'link_thumbnailer/configuration'
@@ -1,68 +1,74 @@
1
- module LinkThumbnailer
2
-
3
- # Access point for the gem configurations.
4
- #
5
- # @return [LinkThumbnailer::Configuration] a configuration instance.
6
- def self.config
7
- @config ||= Configuration.new
8
- end
9
-
10
- # Configure hook used in the gem initializer. Convinient way to set all the
11
- # gem configurations.
12
- #
13
- # @example inside config/initializers/link_thumbnaler.rb
14
- # LinkThumbnailer.configure do |config|
15
- # config.user_agent = 'link_thumbnailer'
16
- # end
17
- #
18
- # @return [void]
19
- def self.configure
20
- yield config if block_given?
21
- end
22
-
23
- class Configuration
24
-
25
- attr_accessor :redirect_limit, :blacklist_urls, :user_agent,
26
- :verify_ssl, :http_open_timeout, :http_read_timeout, :attributes,
27
- :graders, :description_min_length, :positive_regex, :negative_regex,
28
- :image_limit, :image_stats, :raise_on_invalid_format, :max_concurrency,
29
- :scrapers
30
-
31
- alias_method :http_timeout, :http_open_timeout
32
- alias_method :http_timeout=, :http_open_timeout=
33
-
34
- # Create a new instance.
35
- #
36
- # @return [LinkThumbnailer::Configuration]
37
- def initialize
38
- @redirect_limit = 3
39
- @user_agent = 'link_thumbnailer'
40
- @verify_ssl = true
41
- @http_open_timeout = 5
42
- @http_read_timeout = 5
43
- @blacklist_urls = [
44
- %r{^http://ad\.doubleclick\.net/},
45
- %r{^http://b\.scorecardresearch\.com/},
46
- %r{^http://pixel\.quantserve\.com/},
47
- %r{^http://s7\.addthis\.com/}
48
- ]
49
- @attributes = [:title, :images, :description, :videos, :favicon]
50
- @graders = [
51
- ->(description) { ::LinkThumbnailer::Graders::Length.new(description) },
52
- ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
53
- ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
54
- ->(description) { ::LinkThumbnailer::Graders::Position.new(description, weigth: 3) },
55
- ->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) },
56
- ]
57
- @description_min_length = 50
58
- @positive_regex = /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
59
- @negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i
60
- @image_limit = 5
61
- @image_stats = true
62
- @raise_on_invalid_format = false
63
- @max_concurrency = 20
64
- @scrapers = [:opengraph, :default]
65
- end
66
-
67
- end
68
- end
1
+ # frozen_string_literal: true
2
+
3
+ module LinkThumbnailer
4
+
5
+ # Access point for the gem configurations.
6
+ #
7
+ # @return [LinkThumbnailer::Configuration] a configuration instance.
8
+ def self.config
9
+ @config ||= Configuration.new
10
+ end
11
+
12
+ # Configure hook used in the gem initializer. Convinient way to set all the
13
+ # gem configurations.
14
+ #
15
+ # @example inside config/initializers/link_thumbnaler.rb
16
+ # LinkThumbnailer.configure do |config|
17
+ # config.user_agent = 'link_thumbnailer'
18
+ # end
19
+ #
20
+ # @return [void]
21
+ def self.configure
22
+ yield config if block_given?
23
+ end
24
+
25
+ class Configuration
26
+
27
+ attr_accessor :redirect_limit, :blacklist_urls, :user_agent,
28
+ :verify_ssl, :http_open_timeout, :http_read_timeout, :attributes,
29
+ :graders, :description_min_length, :positive_regex, :negative_regex,
30
+ :image_limit, :image_stats, :raise_on_invalid_format, :max_concurrency,
31
+ :scrapers, :http_override_headers, :download_size_limit, :encoding,
32
+ :favicon_size
33
+
34
+ alias_method :http_timeout, :http_open_timeout
35
+ alias_method :http_timeout=, :http_open_timeout=
36
+
37
+ # Create a new instance.
38
+ #
39
+ # @return [LinkThumbnailer::Configuration]
40
+ def initialize
41
+ @redirect_limit = 3
42
+ @user_agent = 'link_thumbnailer'
43
+ @verify_ssl = true
44
+ @http_open_timeout = 5
45
+ @http_read_timeout = 5
46
+ @blacklist_urls = [
47
+ %r{^http://ad\.doubleclick\.net/},
48
+ %r{^http://b\.scorecardresearch\.com/},
49
+ %r{^http://pixel\.quantserve\.com/},
50
+ %r{^http://s7\.addthis\.com/}
51
+ ]
52
+ @attributes = [:title, :images, :description, :videos, :favicon]
53
+ @graders = [
54
+ ->(description) { ::LinkThumbnailer::Graders::Length.new(description) },
55
+ ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
56
+ ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
57
+ ->(description) { ::LinkThumbnailer::Graders::Position.new(description, weigth: 3) },
58
+ ->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) },
59
+ ]
60
+ @description_min_length = 50
61
+ @positive_regex = /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
62
+ @negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i
63
+ @image_limit = 5
64
+ @image_stats = true
65
+ @raise_on_invalid_format = false
66
+ @max_concurrency = 20
67
+ @scrapers = [:opengraph, :default]
68
+ @http_override_headers = { 'Accept-Encoding' => 'none' }
69
+ @download_size_limit = 10 * 1024 * 1024
70
+ @encoding = 'utf-8'
71
+ end
72
+
73
+ end
74
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LinkThumbnailer
2
4
  Exceptions = Class.new(StandardError)
3
5
  RedirectLimit = Class.new(Exceptions)
@@ -6,4 +8,5 @@ module LinkThumbnailer
6
8
  ScraperInvalid = Class.new(Exceptions)
7
9
  HTTPError = Class.new(Exceptions)
8
10
  SyntaxError = Class.new(Exceptions)
11
+ DownloadSizeLimit = Class.new(Exceptions)
9
12
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'delegate'
2
4
  require 'link_thumbnailer/graders/base'
3
5
  require 'link_thumbnailer/graders/length'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'delegate'
2
4
 
3
5
  module LinkThumbnailer
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LinkThumbnailer
2
4
  module Graders
3
5
  class HtmlAttribute < ::LinkThumbnailer::Graders::Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LinkThumbnailer
2
4
  module Graders
3
5
  class Length < ::LinkThumbnailer::Graders::Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LinkThumbnailer
2
4
  module Graders
3
5
  class LinkDensity < ::LinkThumbnailer::Graders::Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LinkThumbnailer
2
4
  module Graders
3
5
  class Position < ::LinkThumbnailer::Graders::Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'link_thumbnailer/image_comparators/base'
2
4
  require 'link_thumbnailer/image_comparators/size'
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LinkThumbnailer
2
4
  module ImageComparators
3
5
  class Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module LinkThumbnailer
2
4
  module ImageComparators
3
5
  class Size < ::LinkThumbnailer::ImageComparators::Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'image_info'
2
4
 
3
5
  module LinkThumbnailer
@@ -6,7 +8,7 @@ module LinkThumbnailer
6
8
  attr_reader :images
7
9
 
8
10
  def initialize(urls)
9
- @images = perform? ? ::ImageInfo.from(urls, max_concurrency: max_concurrency) : Array(urls).map(&method(:build_default_image))
11
+ @images = perform? ? image_info(urls) : default_images(urls)
10
12
  end
11
13
 
12
14
  def size
@@ -19,6 +21,10 @@ module LinkThumbnailer
19
21
 
20
22
  private
21
23
 
24
+ def default_images(urls)
25
+ Array(urls).compact.map(&method(:build_default_image))
26
+ end
27
+
22
28
  def build_default_image(uri)
23
29
  NullImage.new(uri)
24
30
  end
@@ -31,6 +37,12 @@ module LinkThumbnailer
31
37
  ::LinkThumbnailer.page.config.max_concurrency
32
38
  end
33
39
 
40
+ def image_info(urls)
41
+ ::ImageInfo.from(urls, max_concurrency: max_concurrency)
42
+ rescue
43
+ default_images(urls)
44
+ end
45
+
34
46
  class NullImage
35
47
  attr_reader :uri
36
48
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'delegate'
2
4
 
3
5
  module LinkThumbnailer
@@ -1,17 +1,20 @@
1
- module LinkThumbnailer
2
- class Model
3
-
4
- def to_json(*args)
5
- as_json.to_json(*args)
6
- end
7
-
8
- private
9
-
10
- def sanitize(str)
11
- return unless str
12
-
13
- str.encode!("UTF-16", "UTF-8", invalid: :replace, undef: :replace, replace: "")
14
- str.encode!("UTF-8", "UTF-16").strip.gsub(/[\r\n\f]+/, "\n")
15
- end
16
- end
17
- end
1
+ # frozen_string_literal: true
2
+
3
+ module LinkThumbnailer
4
+ class Model
5
+
6
+ def to_json(*args)
7
+ as_json.to_json(*args)
8
+ end
9
+
10
+ private
11
+
12
+ def sanitize(str)
13
+ return unless str
14
+
15
+ str = str.encode("UTF-16", "UTF-8", invalid: :replace, undef: :replace, replace: "")
16
+ str = str.encode("UTF-8", "UTF-16").strip.gsub(/[\r\n\f]+/, "\n")
17
+ str
18
+ end
19
+ end
20
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'link_thumbnailer/model'
2
4
  require 'link_thumbnailer/grader'
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'link_thumbnailer/model'
2
4
 
3
5
  module LinkThumbnailer
@@ -1,54 +1,56 @@
1
- require 'link_thumbnailer/model'
2
- require 'link_thumbnailer/image_parser'
3
- require 'link_thumbnailer/image_comparator'
4
- require 'link_thumbnailer/image_validator'
5
-
6
- module LinkThumbnailer
7
- module Models
8
- class Image < ::LinkThumbnailer::Model
9
-
10
- attr_reader :src, :type, :size
11
-
12
- def initialize(src, size = nil, type = nil)
13
- @src = src
14
- @size = size || parser.size
15
- @type = type || parser.type
16
- end
17
-
18
- def to_s
19
- src.to_s
20
- end
21
-
22
- def <=>(other)
23
- comparator.call(other)
24
- end
25
-
26
- def valid?
27
- validator.call
28
- end
29
-
30
- def as_json(*)
31
- {
32
- src: src.to_s,
33
- size: size,
34
- type: type
35
- }
36
- end
37
-
38
- private
39
-
40
- def parser
41
- @parser ||= ::LinkThumbnailer::ImageParser.new(src)
42
- end
43
-
44
- def validator
45
- ::LinkThumbnailer::ImageValidator.new(self)
46
- end
47
-
48
- def comparator
49
- ::LinkThumbnailer::ImageComparator.new(self)
50
- end
51
-
52
- end
53
- end
54
- end
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+ require 'link_thumbnailer/image_parser'
5
+ require 'link_thumbnailer/image_comparator'
6
+ require 'link_thumbnailer/image_validator'
7
+
8
+ module LinkThumbnailer
9
+ module Models
10
+ class Image < ::LinkThumbnailer::Model
11
+
12
+ attr_reader :src, :type, :size
13
+
14
+ def initialize(src, size = nil, type = nil)
15
+ @src = src
16
+ @size = size || parser.size
17
+ @type = type || parser.type
18
+ end
19
+
20
+ def to_s
21
+ src.to_s
22
+ end
23
+
24
+ def <=>(other)
25
+ comparator.call(other)
26
+ end
27
+
28
+ def valid?
29
+ validator.call
30
+ end
31
+
32
+ def as_json(*)
33
+ {
34
+ src: src.to_s,
35
+ size: size,
36
+ type: type
37
+ }
38
+ end
39
+
40
+ private
41
+
42
+ def parser
43
+ @parser ||= ::LinkThumbnailer::ImageParser.new(src)
44
+ end
45
+
46
+ def validator
47
+ ::LinkThumbnailer::ImageValidator.new(self)
48
+ end
49
+
50
+ def comparator
51
+ ::LinkThumbnailer::ImageComparator.new(self)
52
+ end
53
+
54
+ end
55
+ end
56
+ end