link_thumbnailer 3.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +334 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +210 -0
- data/Rakefile +9 -0
- data/lib/generators/link_thumbnailer/install_generator.rb +17 -0
- data/lib/generators/templates/initializer.rb +89 -0
- data/lib/link_thumbnailer.rb +38 -0
- data/lib/link_thumbnailer/configuration.rb +72 -0
- data/lib/link_thumbnailer/exceptions.rb +11 -0
- data/lib/link_thumbnailer/grader.rb +43 -0
- data/lib/link_thumbnailer/graders/base.rb +39 -0
- data/lib/link_thumbnailer/graders/html_attribute.rb +48 -0
- data/lib/link_thumbnailer/graders/length.rb +37 -0
- data/lib/link_thumbnailer/graders/link_density.rb +20 -0
- data/lib/link_thumbnailer/graders/position.rb +13 -0
- data/lib/link_thumbnailer/image_comparator.rb +26 -0
- data/lib/link_thumbnailer/image_comparators/base.rb +19 -0
- data/lib/link_thumbnailer/image_comparators/size.rb +13 -0
- data/lib/link_thumbnailer/image_parser.rb +62 -0
- data/lib/link_thumbnailer/image_validator.rb +32 -0
- data/lib/link_thumbnailer/model.rb +20 -0
- data/lib/link_thumbnailer/models/description.rb +37 -0
- data/lib/link_thumbnailer/models/favicon.rb +27 -0
- data/lib/link_thumbnailer/models/image.rb +56 -0
- data/lib/link_thumbnailer/models/title.rb +22 -0
- data/lib/link_thumbnailer/models/video.rb +44 -0
- data/lib/link_thumbnailer/models/website.rb +54 -0
- data/lib/link_thumbnailer/page.rb +43 -0
- data/lib/link_thumbnailer/parser.rb +15 -0
- data/lib/link_thumbnailer/processor.rb +128 -0
- data/lib/link_thumbnailer/railtie.rb +6 -0
- data/lib/link_thumbnailer/response.rb +39 -0
- data/lib/link_thumbnailer/scraper.rb +62 -0
- data/lib/link_thumbnailer/scrapers/base.rb +69 -0
- data/lib/link_thumbnailer/scrapers/default/base.rb +12 -0
- data/lib/link_thumbnailer/scrapers/default/description.rb +49 -0
- data/lib/link_thumbnailer/scrapers/default/favicon.rb +38 -0
- data/lib/link_thumbnailer/scrapers/default/images.rb +78 -0
- data/lib/link_thumbnailer/scrapers/default/title.rb +27 -0
- data/lib/link_thumbnailer/scrapers/default/videos.rb +18 -0
- data/lib/link_thumbnailer/scrapers/opengraph/base.rb +45 -0
- data/lib/link_thumbnailer/scrapers/opengraph/description.rb +12 -0
- data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +17 -0
- data/lib/link_thumbnailer/scrapers/opengraph/image.rb +107 -0
- data/lib/link_thumbnailer/scrapers/opengraph/images.rb +18 -0
- data/lib/link_thumbnailer/scrapers/opengraph/title.rb +12 -0
- data/lib/link_thumbnailer/scrapers/opengraph/video.rb +115 -0
- data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +18 -0
- data/lib/link_thumbnailer/uri.rb +20 -0
- data/lib/link_thumbnailer/version.rb +5 -0
- data/lib/link_thumbnailer/video_parser.rb +47 -0
- data/link_thumbnailer.gemspec +29 -0
- data/spec/configuration_spec.rb +61 -0
- data/spec/fixture_spec.rb +114 -0
- data/spec/fixtures/bar.png +2907 -0
- data/spec/fixtures/default_from_body.html +13 -0
- data/spec/fixtures/default_from_meta.html +12 -0
- data/spec/fixtures/foo.png +0 -0
- data/spec/fixtures/google_shift_jis.html +6 -0
- data/spec/fixtures/google_utf8.html +6 -0
- data/spec/fixtures/og_not_valid_example.html +12 -0
- data/spec/fixtures/og_valid_example.html +18 -0
- data/spec/fixtures/og_valid_multi_image_example.html +13 -0
- data/spec/fixtures/og_valid_multi_video_example.html +13 -0
- data/spec/grader_spec.rb +27 -0
- data/spec/graders/base_spec.rb +14 -0
- data/spec/graders/html_attribute_spec.rb +50 -0
- data/spec/graders/length_spec.rb +93 -0
- data/spec/graders/link_density_spec.rb +52 -0
- data/spec/graders/position_spec.rb +49 -0
- data/spec/image_comparators/size_spec.rb +58 -0
- data/spec/image_validator_spec.rb +37 -0
- data/spec/model_spec.rb +27 -0
- data/spec/models/description_spec.rb +66 -0
- data/spec/models/favicon_spec.rb +12 -0
- data/spec/models/image_spec.rb +95 -0
- data/spec/models/title_spec.rb +26 -0
- data/spec/models/video_spec.rb +49 -0
- data/spec/models/website_spec.rb +51 -0
- data/spec/page_spec.rb +28 -0
- data/spec/processor_spec.rb +410 -0
- data/spec/response_spec.rb +62 -0
- data/spec/scraper_spec.rb +70 -0
- data/spec/scrapers/base_spec.rb +69 -0
- data/spec/scrapers/opengraph/base_spec.rb +96 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/uri_spec.rb +44 -0
- data/spec/video_parser_spec.rb +148 -0
- metadata +271 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/image_comparators/base'
|
|
4
|
+
require 'link_thumbnailer/image_comparators/size'
|
|
5
|
+
|
|
6
|
+
module LinkThumbnailer
|
|
7
|
+
class ImageComparator
|
|
8
|
+
|
|
9
|
+
attr_reader :image
|
|
10
|
+
|
|
11
|
+
def initialize(image)
|
|
12
|
+
@image = image
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def call(other)
|
|
16
|
+
size_comparator.call(other)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def size_comparator
|
|
22
|
+
::LinkThumbnailer::ImageComparators::Size.new(image)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module ImageComparators
|
|
5
|
+
class Base
|
|
6
|
+
|
|
7
|
+
attr_reader :image
|
|
8
|
+
|
|
9
|
+
def initialize(image)
|
|
10
|
+
@image = image
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call
|
|
14
|
+
fail NotImplementedError
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'image_info'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
class ImageParser
|
|
7
|
+
|
|
8
|
+
attr_reader :images
|
|
9
|
+
|
|
10
|
+
def initialize(urls)
|
|
11
|
+
@images = perform? ? image_info(urls) : default_images(urls)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def size
|
|
15
|
+
images.first.size
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def type
|
|
19
|
+
images.first.type
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def default_images(urls)
|
|
25
|
+
Array(urls).compact.map(&method(:build_default_image))
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def build_default_image(uri)
|
|
29
|
+
NullImage.new(uri)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def perform?
|
|
33
|
+
::LinkThumbnailer.page.config.image_stats
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def max_concurrency
|
|
37
|
+
::LinkThumbnailer.page.config.max_concurrency
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def image_info(urls)
|
|
41
|
+
::ImageInfo.from(urls, max_concurrency: max_concurrency)
|
|
42
|
+
rescue
|
|
43
|
+
default_images(urls)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
class NullImage
|
|
47
|
+
attr_reader :uri
|
|
48
|
+
|
|
49
|
+
def initialize(uri)
|
|
50
|
+
@uri = uri
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def size
|
|
54
|
+
[0, 0]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def type
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'delegate'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
class ImageValidator < ::SimpleDelegator
|
|
7
|
+
|
|
8
|
+
attr_reader :config, :image
|
|
9
|
+
|
|
10
|
+
def initialize(image)
|
|
11
|
+
@config = ::LinkThumbnailer.page.config
|
|
12
|
+
@image = image
|
|
13
|
+
|
|
14
|
+
super(config)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def call
|
|
18
|
+
blacklist_urls.each do |url|
|
|
19
|
+
return false if image.src && image.src.to_s[url]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
true
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def blacklist_urls
|
|
28
|
+
config.blacklist_urls
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
class Model
|
|
5
|
+
|
|
6
|
+
def to_json(*args)
|
|
7
|
+
as_json.to_json(*args)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def sanitize(str)
|
|
13
|
+
return unless str
|
|
14
|
+
|
|
15
|
+
str = str.dup
|
|
16
|
+
str.encode!("UTF-16", "UTF-8", invalid: :replace, undef: :replace, replace: "")
|
|
17
|
+
str.encode!("UTF-8", "UTF-16").strip.gsub(/[\r\n\f]+/, "\n")
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/model'
|
|
4
|
+
require 'link_thumbnailer/grader'
|
|
5
|
+
|
|
6
|
+
module LinkThumbnailer
|
|
7
|
+
module Models
|
|
8
|
+
class Description < ::LinkThumbnailer::Model
|
|
9
|
+
|
|
10
|
+
attr_reader :node, :text, :position, :candidates_number
|
|
11
|
+
attr_accessor :probability
|
|
12
|
+
|
|
13
|
+
def initialize(node, text, position = 1, candidates_number = 1)
|
|
14
|
+
@node = node
|
|
15
|
+
@text = sanitize(text)
|
|
16
|
+
@position = position
|
|
17
|
+
@candidates_number = candidates_number
|
|
18
|
+
@probability = compute_probability
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_s
|
|
22
|
+
text
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def <=>(other)
|
|
26
|
+
probability <=> other.probability
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def compute_probability
|
|
32
|
+
::LinkThumbnailer::Grader.new(self).call
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/model'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
module Models
|
|
7
|
+
class Favicon < ::LinkThumbnailer::Model
|
|
8
|
+
|
|
9
|
+
attr_reader :uri
|
|
10
|
+
|
|
11
|
+
def initialize(uri)
|
|
12
|
+
@uri = uri
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def to_s
|
|
16
|
+
uri.to_s
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def as_json(*)
|
|
20
|
+
{
|
|
21
|
+
src: to_s
|
|
22
|
+
}
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/model'
|
|
4
|
+
require 'link_thumbnailer/image_parser'
|
|
5
|
+
require 'link_thumbnailer/image_comparator'
|
|
6
|
+
require 'link_thumbnailer/image_validator'
|
|
7
|
+
|
|
8
|
+
module LinkThumbnailer
|
|
9
|
+
module Models
|
|
10
|
+
class Image < ::LinkThumbnailer::Model
|
|
11
|
+
|
|
12
|
+
attr_reader :src, :type, :size
|
|
13
|
+
|
|
14
|
+
def initialize(src, size = nil, type = nil)
|
|
15
|
+
@src = src
|
|
16
|
+
@size = size || parser.size
|
|
17
|
+
@type = type || parser.type
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def to_s
|
|
21
|
+
src.to_s
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def <=>(other)
|
|
25
|
+
comparator.call(other)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def valid?
|
|
29
|
+
validator.call
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def as_json(*)
|
|
33
|
+
{
|
|
34
|
+
src: src.to_s,
|
|
35
|
+
size: size,
|
|
36
|
+
type: type
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def parser
|
|
43
|
+
@parser ||= ::LinkThumbnailer::ImageParser.new(src)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def validator
|
|
47
|
+
::LinkThumbnailer::ImageValidator.new(self)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def comparator
|
|
51
|
+
::LinkThumbnailer::ImageComparator.new(self)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/model'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
module Models
|
|
7
|
+
class Title < ::LinkThumbnailer::Model
|
|
8
|
+
|
|
9
|
+
attr_reader :node, :text
|
|
10
|
+
|
|
11
|
+
def initialize(node, text = nil)
|
|
12
|
+
@node = node
|
|
13
|
+
@text = sanitize(text || node.text)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def to_s
|
|
17
|
+
text
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/model'
|
|
4
|
+
require 'link_thumbnailer/video_parser'
|
|
5
|
+
|
|
6
|
+
module LinkThumbnailer
|
|
7
|
+
module Models
|
|
8
|
+
class Video < ::LinkThumbnailer::Model
|
|
9
|
+
|
|
10
|
+
attr_reader :src, :size, :duration, :provider, :id, :embed_code
|
|
11
|
+
|
|
12
|
+
def initialize(src, size = nil)
|
|
13
|
+
@src = src
|
|
14
|
+
@id = parser.id
|
|
15
|
+
@size = size || parser.size
|
|
16
|
+
@duration = parser.duration
|
|
17
|
+
@provider = parser.provider
|
|
18
|
+
@embed_code = parser.embed_code
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_s
|
|
22
|
+
src.to_s
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def as_json(*)
|
|
26
|
+
{
|
|
27
|
+
id: id,
|
|
28
|
+
src: src.to_s,
|
|
29
|
+
size: size,
|
|
30
|
+
duration: duration,
|
|
31
|
+
provider: provider,
|
|
32
|
+
embed_code: embed_code
|
|
33
|
+
}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def parser
|
|
39
|
+
@parser ||= ::LinkThumbnailer::VideoParser.new(self)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/model'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
module Models
|
|
7
|
+
class Website < ::LinkThumbnailer::Model
|
|
8
|
+
|
|
9
|
+
attr_accessor :url, :title, :description, :images, :videos, :favicon
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
@images = []
|
|
13
|
+
@videos = []
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def video=(video)
|
|
17
|
+
self.videos = video
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def videos=(videos)
|
|
21
|
+
Array(videos).each do |video|
|
|
22
|
+
@videos << video
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def image=(image)
|
|
27
|
+
self.images = image
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def images=(images)
|
|
31
|
+
Array(images).each do |image|
|
|
32
|
+
next unless image.valid?
|
|
33
|
+
@images << image
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def images
|
|
38
|
+
@images.sort!
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def as_json(*)
|
|
42
|
+
{
|
|
43
|
+
url: url.to_s,
|
|
44
|
+
favicon: favicon,
|
|
45
|
+
title: title,
|
|
46
|
+
description: description,
|
|
47
|
+
images: images.map(&:as_json),
|
|
48
|
+
videos: videos.map(&:as_json)
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/response'
|
|
4
|
+
require 'link_thumbnailer/processor'
|
|
5
|
+
require 'link_thumbnailer/scraper'
|
|
6
|
+
|
|
7
|
+
module LinkThumbnailer
|
|
8
|
+
class Page
|
|
9
|
+
|
|
10
|
+
attr_reader :url, :options, :source
|
|
11
|
+
|
|
12
|
+
def initialize(url, options = {})
|
|
13
|
+
@url = url
|
|
14
|
+
@options = options
|
|
15
|
+
|
|
16
|
+
set_options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def generate
|
|
20
|
+
@source = processor.call(url)
|
|
21
|
+
scraper.call
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def config
|
|
25
|
+
@config ||= ::LinkThumbnailer.config.dup
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def set_options
|
|
31
|
+
options.each { |k, v| config.send("#{k}=", v) }
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def processor
|
|
35
|
+
@processor ||= ::LinkThumbnailer::Processor.new
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def scraper
|
|
39
|
+
@scraper ||= ::LinkThumbnailer::Scraper.new(source, processor.url)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
end
|