link_thumbnailer 3.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +5 -0
  6. data/CHANGELOG.md +334 -0
  7. data/Gemfile +12 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +210 -0
  10. data/Rakefile +9 -0
  11. data/lib/generators/link_thumbnailer/install_generator.rb +17 -0
  12. data/lib/generators/templates/initializer.rb +89 -0
  13. data/lib/link_thumbnailer.rb +38 -0
  14. data/lib/link_thumbnailer/configuration.rb +72 -0
  15. data/lib/link_thumbnailer/exceptions.rb +11 -0
  16. data/lib/link_thumbnailer/grader.rb +43 -0
  17. data/lib/link_thumbnailer/graders/base.rb +39 -0
  18. data/lib/link_thumbnailer/graders/html_attribute.rb +48 -0
  19. data/lib/link_thumbnailer/graders/length.rb +37 -0
  20. data/lib/link_thumbnailer/graders/link_density.rb +20 -0
  21. data/lib/link_thumbnailer/graders/position.rb +13 -0
  22. data/lib/link_thumbnailer/image_comparator.rb +26 -0
  23. data/lib/link_thumbnailer/image_comparators/base.rb +19 -0
  24. data/lib/link_thumbnailer/image_comparators/size.rb +13 -0
  25. data/lib/link_thumbnailer/image_parser.rb +62 -0
  26. data/lib/link_thumbnailer/image_validator.rb +32 -0
  27. data/lib/link_thumbnailer/model.rb +20 -0
  28. data/lib/link_thumbnailer/models/description.rb +37 -0
  29. data/lib/link_thumbnailer/models/favicon.rb +27 -0
  30. data/lib/link_thumbnailer/models/image.rb +56 -0
  31. data/lib/link_thumbnailer/models/title.rb +22 -0
  32. data/lib/link_thumbnailer/models/video.rb +44 -0
  33. data/lib/link_thumbnailer/models/website.rb +54 -0
  34. data/lib/link_thumbnailer/page.rb +43 -0
  35. data/lib/link_thumbnailer/parser.rb +15 -0
  36. data/lib/link_thumbnailer/processor.rb +128 -0
  37. data/lib/link_thumbnailer/railtie.rb +6 -0
  38. data/lib/link_thumbnailer/response.rb +39 -0
  39. data/lib/link_thumbnailer/scraper.rb +62 -0
  40. data/lib/link_thumbnailer/scrapers/base.rb +69 -0
  41. data/lib/link_thumbnailer/scrapers/default/base.rb +12 -0
  42. data/lib/link_thumbnailer/scrapers/default/description.rb +49 -0
  43. data/lib/link_thumbnailer/scrapers/default/favicon.rb +38 -0
  44. data/lib/link_thumbnailer/scrapers/default/images.rb +78 -0
  45. data/lib/link_thumbnailer/scrapers/default/title.rb +27 -0
  46. data/lib/link_thumbnailer/scrapers/default/videos.rb +18 -0
  47. data/lib/link_thumbnailer/scrapers/opengraph/base.rb +45 -0
  48. data/lib/link_thumbnailer/scrapers/opengraph/description.rb +12 -0
  49. data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +17 -0
  50. data/lib/link_thumbnailer/scrapers/opengraph/image.rb +107 -0
  51. data/lib/link_thumbnailer/scrapers/opengraph/images.rb +18 -0
  52. data/lib/link_thumbnailer/scrapers/opengraph/title.rb +12 -0
  53. data/lib/link_thumbnailer/scrapers/opengraph/video.rb +115 -0
  54. data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +18 -0
  55. data/lib/link_thumbnailer/uri.rb +20 -0
  56. data/lib/link_thumbnailer/version.rb +5 -0
  57. data/lib/link_thumbnailer/video_parser.rb +47 -0
  58. data/link_thumbnailer.gemspec +29 -0
  59. data/spec/configuration_spec.rb +61 -0
  60. data/spec/fixture_spec.rb +114 -0
  61. data/spec/fixtures/bar.png +2907 -0
  62. data/spec/fixtures/default_from_body.html +13 -0
  63. data/spec/fixtures/default_from_meta.html +12 -0
  64. data/spec/fixtures/foo.png +0 -0
  65. data/spec/fixtures/google_shift_jis.html +6 -0
  66. data/spec/fixtures/google_utf8.html +6 -0
  67. data/spec/fixtures/og_not_valid_example.html +12 -0
  68. data/spec/fixtures/og_valid_example.html +18 -0
  69. data/spec/fixtures/og_valid_multi_image_example.html +13 -0
  70. data/spec/fixtures/og_valid_multi_video_example.html +13 -0
  71. data/spec/grader_spec.rb +27 -0
  72. data/spec/graders/base_spec.rb +14 -0
  73. data/spec/graders/html_attribute_spec.rb +50 -0
  74. data/spec/graders/length_spec.rb +93 -0
  75. data/spec/graders/link_density_spec.rb +52 -0
  76. data/spec/graders/position_spec.rb +49 -0
  77. data/spec/image_comparators/size_spec.rb +58 -0
  78. data/spec/image_validator_spec.rb +37 -0
  79. data/spec/model_spec.rb +27 -0
  80. data/spec/models/description_spec.rb +66 -0
  81. data/spec/models/favicon_spec.rb +12 -0
  82. data/spec/models/image_spec.rb +95 -0
  83. data/spec/models/title_spec.rb +26 -0
  84. data/spec/models/video_spec.rb +49 -0
  85. data/spec/models/website_spec.rb +51 -0
  86. data/spec/page_spec.rb +28 -0
  87. data/spec/processor_spec.rb +410 -0
  88. data/spec/response_spec.rb +62 -0
  89. data/spec/scraper_spec.rb +70 -0
  90. data/spec/scrapers/base_spec.rb +69 -0
  91. data/spec/scrapers/opengraph/base_spec.rb +96 -0
  92. data/spec/spec_helper.rb +11 -0
  93. data/spec/uri_spec.rb +44 -0
  94. data/spec/video_parser_spec.rb +148 -0
  95. metadata +271 -0
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/image_comparators/base'
4
+ require 'link_thumbnailer/image_comparators/size'
5
+
6
+ module LinkThumbnailer
7
+ class ImageComparator
8
+
9
+ attr_reader :image
10
+
11
+ def initialize(image)
12
+ @image = image
13
+ end
14
+
15
+ def call(other)
16
+ size_comparator.call(other)
17
+ end
18
+
19
+ private
20
+
21
+ def size_comparator
22
+ ::LinkThumbnailer::ImageComparators::Size.new(image)
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LinkThumbnailer
4
+ module ImageComparators
5
+ class Base
6
+
7
+ attr_reader :image
8
+
9
+ def initialize(image)
10
+ @image = image
11
+ end
12
+
13
+ def call
14
+ fail NotImplementedError
15
+ end
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LinkThumbnailer
4
+ module ImageComparators
5
+ class Size < ::LinkThumbnailer::ImageComparators::Base
6
+
7
+ def call(other)
8
+ (other.size.min.to_i ** 2) <=> (image.size.min.to_i ** 2)
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'image_info'
4
+
5
+ module LinkThumbnailer
6
+ class ImageParser
7
+
8
+ attr_reader :images
9
+
10
+ def initialize(urls)
11
+ @images = perform? ? image_info(urls) : default_images(urls)
12
+ end
13
+
14
+ def size
15
+ images.first.size
16
+ end
17
+
18
+ def type
19
+ images.first.type
20
+ end
21
+
22
+ private
23
+
24
+ def default_images(urls)
25
+ Array(urls).compact.map(&method(:build_default_image))
26
+ end
27
+
28
+ def build_default_image(uri)
29
+ NullImage.new(uri)
30
+ end
31
+
32
+ def perform?
33
+ ::LinkThumbnailer.page.config.image_stats
34
+ end
35
+
36
+ def max_concurrency
37
+ ::LinkThumbnailer.page.config.max_concurrency
38
+ end
39
+
40
+ def image_info(urls)
41
+ ::ImageInfo.from(urls, max_concurrency: max_concurrency)
42
+ rescue
43
+ default_images(urls)
44
+ end
45
+
46
+ class NullImage
47
+ attr_reader :uri
48
+
49
+ def initialize(uri)
50
+ @uri = uri
51
+ end
52
+
53
+ def size
54
+ [0, 0]
55
+ end
56
+
57
+ def type
58
+ end
59
+ end
60
+
61
+ end
62
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+
5
+ module LinkThumbnailer
6
+ class ImageValidator < ::SimpleDelegator
7
+
8
+ attr_reader :config, :image
9
+
10
+ def initialize(image)
11
+ @config = ::LinkThumbnailer.page.config
12
+ @image = image
13
+
14
+ super(config)
15
+ end
16
+
17
+ def call
18
+ blacklist_urls.each do |url|
19
+ return false if image.src && image.src.to_s[url]
20
+ end
21
+
22
+ true
23
+ end
24
+
25
+ private
26
+
27
+ def blacklist_urls
28
+ config.blacklist_urls
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LinkThumbnailer
4
+ class Model
5
+
6
+ def to_json(*args)
7
+ as_json.to_json(*args)
8
+ end
9
+
10
+ private
11
+
12
+ def sanitize(str)
13
+ return unless str
14
+
15
+ str = str.dup
16
+ str.encode!("UTF-16", "UTF-8", invalid: :replace, undef: :replace, replace: "")
17
+ str.encode!("UTF-8", "UTF-16").strip.gsub(/[\r\n\f]+/, "\n")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+ require 'link_thumbnailer/grader'
5
+
6
+ module LinkThumbnailer
7
+ module Models
8
+ class Description < ::LinkThumbnailer::Model
9
+
10
+ attr_reader :node, :text, :position, :candidates_number
11
+ attr_accessor :probability
12
+
13
+ def initialize(node, text, position = 1, candidates_number = 1)
14
+ @node = node
15
+ @text = sanitize(text)
16
+ @position = position
17
+ @candidates_number = candidates_number
18
+ @probability = compute_probability
19
+ end
20
+
21
+ def to_s
22
+ text
23
+ end
24
+
25
+ def <=>(other)
26
+ probability <=> other.probability
27
+ end
28
+
29
+ private
30
+
31
+ def compute_probability
32
+ ::LinkThumbnailer::Grader.new(self).call
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+
5
+ module LinkThumbnailer
6
+ module Models
7
+ class Favicon < ::LinkThumbnailer::Model
8
+
9
+ attr_reader :uri
10
+
11
+ def initialize(uri)
12
+ @uri = uri
13
+ end
14
+
15
+ def to_s
16
+ uri.to_s
17
+ end
18
+
19
+ def as_json(*)
20
+ {
21
+ src: to_s
22
+ }
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+ require 'link_thumbnailer/image_parser'
5
+ require 'link_thumbnailer/image_comparator'
6
+ require 'link_thumbnailer/image_validator'
7
+
8
+ module LinkThumbnailer
9
+ module Models
10
+ class Image < ::LinkThumbnailer::Model
11
+
12
+ attr_reader :src, :type, :size
13
+
14
+ def initialize(src, size = nil, type = nil)
15
+ @src = src
16
+ @size = size || parser.size
17
+ @type = type || parser.type
18
+ end
19
+
20
+ def to_s
21
+ src.to_s
22
+ end
23
+
24
+ def <=>(other)
25
+ comparator.call(other)
26
+ end
27
+
28
+ def valid?
29
+ validator.call
30
+ end
31
+
32
+ def as_json(*)
33
+ {
34
+ src: src.to_s,
35
+ size: size,
36
+ type: type
37
+ }
38
+ end
39
+
40
+ private
41
+
42
+ def parser
43
+ @parser ||= ::LinkThumbnailer::ImageParser.new(src)
44
+ end
45
+
46
+ def validator
47
+ ::LinkThumbnailer::ImageValidator.new(self)
48
+ end
49
+
50
+ def comparator
51
+ ::LinkThumbnailer::ImageComparator.new(self)
52
+ end
53
+
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+
5
+ module LinkThumbnailer
6
+ module Models
7
+ class Title < ::LinkThumbnailer::Model
8
+
9
+ attr_reader :node, :text
10
+
11
+ def initialize(node, text = nil)
12
+ @node = node
13
+ @text = sanitize(text || node.text)
14
+ end
15
+
16
+ def to_s
17
+ text
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+ require 'link_thumbnailer/video_parser'
5
+
6
+ module LinkThumbnailer
7
+ module Models
8
+ class Video < ::LinkThumbnailer::Model
9
+
10
+ attr_reader :src, :size, :duration, :provider, :id, :embed_code
11
+
12
+ def initialize(src, size = nil)
13
+ @src = src
14
+ @id = parser.id
15
+ @size = size || parser.size
16
+ @duration = parser.duration
17
+ @provider = parser.provider
18
+ @embed_code = parser.embed_code
19
+ end
20
+
21
+ def to_s
22
+ src.to_s
23
+ end
24
+
25
+ def as_json(*)
26
+ {
27
+ id: id,
28
+ src: src.to_s,
29
+ size: size,
30
+ duration: duration,
31
+ provider: provider,
32
+ embed_code: embed_code
33
+ }
34
+ end
35
+
36
+ private
37
+
38
+ def parser
39
+ @parser ||= ::LinkThumbnailer::VideoParser.new(self)
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/model'
4
+
5
+ module LinkThumbnailer
6
+ module Models
7
+ class Website < ::LinkThumbnailer::Model
8
+
9
+ attr_accessor :url, :title, :description, :images, :videos, :favicon
10
+
11
+ def initialize
12
+ @images = []
13
+ @videos = []
14
+ end
15
+
16
+ def video=(video)
17
+ self.videos = video
18
+ end
19
+
20
+ def videos=(videos)
21
+ Array(videos).each do |video|
22
+ @videos << video
23
+ end
24
+ end
25
+
26
+ def image=(image)
27
+ self.images = image
28
+ end
29
+
30
+ def images=(images)
31
+ Array(images).each do |image|
32
+ next unless image.valid?
33
+ @images << image
34
+ end
35
+ end
36
+
37
+ def images
38
+ @images.sort!
39
+ end
40
+
41
+ def as_json(*)
42
+ {
43
+ url: url.to_s,
44
+ favicon: favicon,
45
+ title: title,
46
+ description: description,
47
+ images: images.map(&:as_json),
48
+ videos: videos.map(&:as_json)
49
+ }
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'link_thumbnailer/response'
4
+ require 'link_thumbnailer/processor'
5
+ require 'link_thumbnailer/scraper'
6
+
7
+ module LinkThumbnailer
8
+ class Page
9
+
10
+ attr_reader :url, :options, :source
11
+
12
+ def initialize(url, options = {})
13
+ @url = url
14
+ @options = options
15
+
16
+ set_options
17
+ end
18
+
19
+ def generate
20
+ @source = processor.call(url)
21
+ scraper.call
22
+ end
23
+
24
+ def config
25
+ @config ||= ::LinkThumbnailer.config.dup
26
+ end
27
+
28
+ private
29
+
30
+ def set_options
31
+ options.each { |k, v| config.send("#{k}=", v) }
32
+ end
33
+
34
+ def processor
35
+ @processor ||= ::LinkThumbnailer::Processor.new
36
+ end
37
+
38
+ def scraper
39
+ @scraper ||= ::LinkThumbnailer::Scraper.new(source, processor.url)
40
+ end
41
+
42
+ end
43
+ end