link_thumbnailer 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +5 -0
  6. data/CHANGELOG.md +334 -0
  7. data/Gemfile +12 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +210 -0
  10. data/Rakefile +9 -0
  11. data/lib/generators/link_thumbnailer/install_generator.rb +17 -0
  12. data/lib/generators/templates/initializer.rb +89 -0
  13. data/lib/link_thumbnailer.rb +38 -0
  14. data/lib/link_thumbnailer/configuration.rb +72 -0
  15. data/lib/link_thumbnailer/exceptions.rb +11 -0
  16. data/lib/link_thumbnailer/grader.rb +43 -0
  17. data/lib/link_thumbnailer/graders/base.rb +39 -0
  18. data/lib/link_thumbnailer/graders/html_attribute.rb +48 -0
  19. data/lib/link_thumbnailer/graders/length.rb +37 -0
  20. data/lib/link_thumbnailer/graders/link_density.rb +20 -0
  21. data/lib/link_thumbnailer/graders/position.rb +13 -0
  22. data/lib/link_thumbnailer/image_comparator.rb +26 -0
  23. data/lib/link_thumbnailer/image_comparators/base.rb +19 -0
  24. data/lib/link_thumbnailer/image_comparators/size.rb +13 -0
  25. data/lib/link_thumbnailer/image_parser.rb +62 -0
  26. data/lib/link_thumbnailer/image_validator.rb +32 -0
  27. data/lib/link_thumbnailer/model.rb +20 -0
  28. data/lib/link_thumbnailer/models/description.rb +37 -0
  29. data/lib/link_thumbnailer/models/favicon.rb +27 -0
  30. data/lib/link_thumbnailer/models/image.rb +56 -0
  31. data/lib/link_thumbnailer/models/title.rb +22 -0
  32. data/lib/link_thumbnailer/models/video.rb +44 -0
  33. data/lib/link_thumbnailer/models/website.rb +54 -0
  34. data/lib/link_thumbnailer/page.rb +43 -0
  35. data/lib/link_thumbnailer/parser.rb +15 -0
  36. data/lib/link_thumbnailer/processor.rb +128 -0
  37. data/lib/link_thumbnailer/railtie.rb +6 -0
  38. data/lib/link_thumbnailer/response.rb +39 -0
  39. data/lib/link_thumbnailer/scraper.rb +62 -0
  40. data/lib/link_thumbnailer/scrapers/base.rb +69 -0
  41. data/lib/link_thumbnailer/scrapers/default/base.rb +12 -0
  42. data/lib/link_thumbnailer/scrapers/default/description.rb +49 -0
  43. data/lib/link_thumbnailer/scrapers/default/favicon.rb +38 -0
  44. data/lib/link_thumbnailer/scrapers/default/images.rb +78 -0
  45. data/lib/link_thumbnailer/scrapers/default/title.rb +27 -0
  46. data/lib/link_thumbnailer/scrapers/default/videos.rb +18 -0
  47. data/lib/link_thumbnailer/scrapers/opengraph/base.rb +45 -0
  48. data/lib/link_thumbnailer/scrapers/opengraph/description.rb +12 -0
  49. data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +17 -0
  50. data/lib/link_thumbnailer/scrapers/opengraph/image.rb +107 -0
  51. data/lib/link_thumbnailer/scrapers/opengraph/images.rb +18 -0
  52. data/lib/link_thumbnailer/scrapers/opengraph/title.rb +12 -0
  53. data/lib/link_thumbnailer/scrapers/opengraph/video.rb +115 -0
  54. data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +18 -0
  55. data/lib/link_thumbnailer/uri.rb +20 -0
  56. data/lib/link_thumbnailer/version.rb +5 -0
  57. data/lib/link_thumbnailer/video_parser.rb +47 -0
  58. data/link_thumbnailer.gemspec +29 -0
  59. data/spec/configuration_spec.rb +61 -0
  60. data/spec/fixture_spec.rb +114 -0
  61. data/spec/fixtures/bar.png +2907 -0
  62. data/spec/fixtures/default_from_body.html +13 -0
  63. data/spec/fixtures/default_from_meta.html +12 -0
  64. data/spec/fixtures/foo.png +0 -0
  65. data/spec/fixtures/google_shift_jis.html +6 -0
  66. data/spec/fixtures/google_utf8.html +6 -0
  67. data/spec/fixtures/og_not_valid_example.html +12 -0
  68. data/spec/fixtures/og_valid_example.html +18 -0
  69. data/spec/fixtures/og_valid_multi_image_example.html +13 -0
  70. data/spec/fixtures/og_valid_multi_video_example.html +13 -0
  71. data/spec/grader_spec.rb +27 -0
  72. data/spec/graders/base_spec.rb +14 -0
  73. data/spec/graders/html_attribute_spec.rb +50 -0
  74. data/spec/graders/length_spec.rb +93 -0
  75. data/spec/graders/link_density_spec.rb +52 -0
  76. data/spec/graders/position_spec.rb +49 -0
  77. data/spec/image_comparators/size_spec.rb +58 -0
  78. data/spec/image_validator_spec.rb +37 -0
  79. data/spec/model_spec.rb +27 -0
  80. data/spec/models/description_spec.rb +66 -0
  81. data/spec/models/favicon_spec.rb +12 -0
  82. data/spec/models/image_spec.rb +95 -0
  83. data/spec/models/title_spec.rb +26 -0
  84. data/spec/models/video_spec.rb +49 -0
  85. data/spec/models/website_spec.rb +51 -0
  86. data/spec/page_spec.rb +28 -0
  87. data/spec/processor_spec.rb +410 -0
  88. data/spec/response_spec.rb +62 -0
  89. data/spec/scraper_spec.rb +70 -0
  90. data/spec/scrapers/base_spec.rb +69 -0
  91. data/spec/scrapers/opengraph/base_spec.rb +96 -0
  92. data/spec/spec_helper.rb +11 -0
  93. data/spec/uri_spec.rb +44 -0
  94. data/spec/video_parser_spec.rb +148 -0
  95. metadata +271 -0
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/image_comparators/base'
4
+ require 'link_thumbnailer/image_comparators/size'
5
+
6
+ module LinkThumbnailer
7
+ class ImageComparator
8
+
9
+ attr_reader :image
10
+
11
+ def initialize(image)
12
+ @image = image
13
+ end
14
+
15
+ def call(other)
16
+ size_comparator.call(other)
17
+ end
18
+
19
+ private
20
+
21
+ def size_comparator
22
+ ::LinkThumbnailer::ImageComparators::Size.new(image)
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LinkThumbnailer
4
+ module ImageComparators
5
+ class Base
6
+
7
+ attr_reader :image
8
+
9
+ def initialize(image)
10
+ @image = image
11
+ end
12
+
13
+ def call
14
+ fail NotImplementedError
15
+ end
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LinkThumbnailer
4
+ module ImageComparators
5
+ class Size < ::LinkThumbnailer::ImageComparators::Base
6
+
7
+ def call(other)
8
+ (other.size.min.to_i ** 2) <=> (image.size.min.to_i ** 2)
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'image_info'
4
+
5
+ module LinkThumbnailer
6
+ class ImageParser
7
+
8
+ attr_reader :images
9
+
10
+ def initialize(urls)
11
+ @images = perform? ? image_info(urls) : default_images(urls)
12
+ end
13
+
14
+ def size
15
+ images.first.size
16
+ end
17
+
18
+ def type
19
+ images.first.type
20
+ end
21
+
22
+ private
23
+
24
+ def default_images(urls)
25
+ Array(urls).compact.map(&method(:build_default_image))
26
+ end
27
+
28
+ def build_default_image(uri)
29
+ NullImage.new(uri)
30
+ end
31
+
32
+ def perform?
33
+ ::LinkThumbnailer.page.config.image_stats
34
+ end
35
+
36
+ def max_concurrency
37
+ ::LinkThumbnailer.page.config.max_concurrency
38
+ end
39
+
40
+ def image_info(urls)
41
+ ::ImageInfo.from(urls, max_concurrency: max_concurrency)
42
+ rescue
43
+ default_images(urls)
44
+ end
45
+
46
+ class NullImage
47
+ attr_reader :uri
48
+
49
+ def initialize(uri)
50
+ @uri = uri
51
+ end
52
+
53
+ def size
54
+ [0, 0]
55
+ end
56
+
57
+ def type
58
+ end
59
+ end
60
+
61
+ end
62
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+
5
+ module LinkThumbnailer
6
+ class ImageValidator < ::SimpleDelegator
7
+
8
+ attr_reader :config, :image
9
+
10
+ def initialize(image)
11
+ @config = ::LinkThumbnailer.page.config
12
+ @image = image
13
+
14
+ super(config)
15
+ end
16
+
17
+ def call
18
+ blacklist_urls.each do |url|
19
+ return false if image.src && image.src.to_s[url]
20
+ end
21
+
22
+ true
23
+ end
24
+
25
+ private
26
+
27
+ def blacklist_urls
28
+ config.blacklist_urls
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LinkThumbnailer
4
+ class Model
5
+
6
+ def to_json(*args)
7
+ as_json.to_json(*args)
8
+ end
9
+
10
+ private
11
+
12
+ def sanitize(str)
13
+ return unless str
14
+
15
+ str = str.dup
16
+ str.encode!("UTF-16", "UTF-8", invalid: :replace, undef: :replace, replace: "")
17
+ str.encode!("UTF-8", "UTF-16").strip.gsub(/[\r\n\f]+/, "\n")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+ require 'link_thumbnailer/grader'
5
+
6
+ module LinkThumbnailer
7
+ module Models
8
+ class Description < ::LinkThumbnailer::Model
9
+
10
+ attr_reader :node, :text, :position, :candidates_number
11
+ attr_accessor :probability
12
+
13
+ def initialize(node, text, position = 1, candidates_number = 1)
14
+ @node = node
15
+ @text = sanitize(text)
16
+ @position = position
17
+ @candidates_number = candidates_number
18
+ @probability = compute_probability
19
+ end
20
+
21
+ def to_s
22
+ text
23
+ end
24
+
25
+ def <=>(other)
26
+ probability <=> other.probability
27
+ end
28
+
29
+ private
30
+
31
+ def compute_probability
32
+ ::LinkThumbnailer::Grader.new(self).call
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+
5
+ module LinkThumbnailer
6
+ module Models
7
+ class Favicon < ::LinkThumbnailer::Model
8
+
9
+ attr_reader :uri
10
+
11
+ def initialize(uri)
12
+ @uri = uri
13
+ end
14
+
15
+ def to_s
16
+ uri.to_s
17
+ end
18
+
19
+ def as_json(*)
20
+ {
21
+ src: to_s
22
+ }
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+ require 'link_thumbnailer/image_parser'
5
+ require 'link_thumbnailer/image_comparator'
6
+ require 'link_thumbnailer/image_validator'
7
+
8
+ module LinkThumbnailer
9
+ module Models
10
+ class Image < ::LinkThumbnailer::Model
11
+
12
+ attr_reader :src, :type, :size
13
+
14
+ def initialize(src, size = nil, type = nil)
15
+ @src = src
16
+ @size = size || parser.size
17
+ @type = type || parser.type
18
+ end
19
+
20
+ def to_s
21
+ src.to_s
22
+ end
23
+
24
+ def <=>(other)
25
+ comparator.call(other)
26
+ end
27
+
28
+ def valid?
29
+ validator.call
30
+ end
31
+
32
+ def as_json(*)
33
+ {
34
+ src: src.to_s,
35
+ size: size,
36
+ type: type
37
+ }
38
+ end
39
+
40
+ private
41
+
42
+ def parser
43
+ @parser ||= ::LinkThumbnailer::ImageParser.new(src)
44
+ end
45
+
46
+ def validator
47
+ ::LinkThumbnailer::ImageValidator.new(self)
48
+ end
49
+
50
+ def comparator
51
+ ::LinkThumbnailer::ImageComparator.new(self)
52
+ end
53
+
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+
5
+ module LinkThumbnailer
6
+ module Models
7
+ class Title < ::LinkThumbnailer::Model
8
+
9
+ attr_reader :node, :text
10
+
11
+ def initialize(node, text = nil)
12
+ @node = node
13
+ @text = sanitize(text || node.text)
14
+ end
15
+
16
+ def to_s
17
+ text
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+ require 'link_thumbnailer/video_parser'
5
+
6
+ module LinkThumbnailer
7
+ module Models
8
+ class Video < ::LinkThumbnailer::Model
9
+
10
+ attr_reader :src, :size, :duration, :provider, :id, :embed_code
11
+
12
+ def initialize(src, size = nil)
13
+ @src = src
14
+ @id = parser.id
15
+ @size = size || parser.size
16
+ @duration = parser.duration
17
+ @provider = parser.provider
18
+ @embed_code = parser.embed_code
19
+ end
20
+
21
+ def to_s
22
+ src.to_s
23
+ end
24
+
25
+ def as_json(*)
26
+ {
27
+ id: id,
28
+ src: src.to_s,
29
+ size: size,
30
+ duration: duration,
31
+ provider: provider,
32
+ embed_code: embed_code
33
+ }
34
+ end
35
+
36
+ private
37
+
38
+ def parser
39
+ @parser ||= ::LinkThumbnailer::VideoParser.new(self)
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+
5
+ module LinkThumbnailer
6
+ module Models
7
+ class Website < ::LinkThumbnailer::Model
8
+
9
+ attr_accessor :url, :title, :description, :images, :videos, :favicon
10
+
11
+ def initialize
12
+ @images = []
13
+ @videos = []
14
+ end
15
+
16
+ def video=(video)
17
+ self.videos = video
18
+ end
19
+
20
+ def videos=(videos)
21
+ Array(videos).each do |video|
22
+ @videos << video
23
+ end
24
+ end
25
+
26
+ def image=(image)
27
+ self.images = image
28
+ end
29
+
30
+ def images=(images)
31
+ Array(images).each do |image|
32
+ next unless image.valid?
33
+ @images << image
34
+ end
35
+ end
36
+
37
+ def images
38
+ @images.sort!
39
+ end
40
+
41
+ def as_json(*)
42
+ {
43
+ url: url.to_s,
44
+ favicon: favicon,
45
+ title: title,
46
+ description: description,
47
+ images: images.map(&:as_json),
48
+ videos: videos.map(&:as_json)
49
+ }
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/response'
4
+ require 'link_thumbnailer/processor'
5
+ require 'link_thumbnailer/scraper'
6
+
7
+ module LinkThumbnailer
8
+ class Page
9
+
10
+ attr_reader :url, :options, :source
11
+
12
+ def initialize(url, options = {})
13
+ @url = url
14
+ @options = options
15
+
16
+ set_options
17
+ end
18
+
19
+ def generate
20
+ @source = processor.call(url)
21
+ scraper.call
22
+ end
23
+
24
+ def config
25
+ @config ||= ::LinkThumbnailer.config.dup
26
+ end
27
+
28
+ private
29
+
30
+ def set_options
31
+ options.each { |k, v| config.send("#{k}=", v) }
32
+ end
33
+
34
+ def processor
35
+ @processor ||= ::LinkThumbnailer::Processor.new
36
+ end
37
+
38
+ def scraper
39
+ @scraper ||= ::LinkThumbnailer::Scraper.new(source, processor.url)
40
+ end
41
+
42
+ end
43
+ end