link_thumbnailer 3.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +334 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +210 -0
- data/Rakefile +9 -0
- data/lib/generators/link_thumbnailer/install_generator.rb +17 -0
- data/lib/generators/templates/initializer.rb +89 -0
- data/lib/link_thumbnailer.rb +38 -0
- data/lib/link_thumbnailer/configuration.rb +72 -0
- data/lib/link_thumbnailer/exceptions.rb +11 -0
- data/lib/link_thumbnailer/grader.rb +43 -0
- data/lib/link_thumbnailer/graders/base.rb +39 -0
- data/lib/link_thumbnailer/graders/html_attribute.rb +48 -0
- data/lib/link_thumbnailer/graders/length.rb +37 -0
- data/lib/link_thumbnailer/graders/link_density.rb +20 -0
- data/lib/link_thumbnailer/graders/position.rb +13 -0
- data/lib/link_thumbnailer/image_comparator.rb +26 -0
- data/lib/link_thumbnailer/image_comparators/base.rb +19 -0
- data/lib/link_thumbnailer/image_comparators/size.rb +13 -0
- data/lib/link_thumbnailer/image_parser.rb +62 -0
- data/lib/link_thumbnailer/image_validator.rb +32 -0
- data/lib/link_thumbnailer/model.rb +20 -0
- data/lib/link_thumbnailer/models/description.rb +37 -0
- data/lib/link_thumbnailer/models/favicon.rb +27 -0
- data/lib/link_thumbnailer/models/image.rb +56 -0
- data/lib/link_thumbnailer/models/title.rb +22 -0
- data/lib/link_thumbnailer/models/video.rb +44 -0
- data/lib/link_thumbnailer/models/website.rb +54 -0
- data/lib/link_thumbnailer/page.rb +43 -0
- data/lib/link_thumbnailer/parser.rb +15 -0
- data/lib/link_thumbnailer/processor.rb +128 -0
- data/lib/link_thumbnailer/railtie.rb +6 -0
- data/lib/link_thumbnailer/response.rb +39 -0
- data/lib/link_thumbnailer/scraper.rb +62 -0
- data/lib/link_thumbnailer/scrapers/base.rb +69 -0
- data/lib/link_thumbnailer/scrapers/default/base.rb +12 -0
- data/lib/link_thumbnailer/scrapers/default/description.rb +49 -0
- data/lib/link_thumbnailer/scrapers/default/favicon.rb +38 -0
- data/lib/link_thumbnailer/scrapers/default/images.rb +78 -0
- data/lib/link_thumbnailer/scrapers/default/title.rb +27 -0
- data/lib/link_thumbnailer/scrapers/default/videos.rb +18 -0
- data/lib/link_thumbnailer/scrapers/opengraph/base.rb +45 -0
- data/lib/link_thumbnailer/scrapers/opengraph/description.rb +12 -0
- data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +17 -0
- data/lib/link_thumbnailer/scrapers/opengraph/image.rb +107 -0
- data/lib/link_thumbnailer/scrapers/opengraph/images.rb +18 -0
- data/lib/link_thumbnailer/scrapers/opengraph/title.rb +12 -0
- data/lib/link_thumbnailer/scrapers/opengraph/video.rb +115 -0
- data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +18 -0
- data/lib/link_thumbnailer/uri.rb +20 -0
- data/lib/link_thumbnailer/version.rb +5 -0
- data/lib/link_thumbnailer/video_parser.rb +47 -0
- data/link_thumbnailer.gemspec +29 -0
- data/spec/configuration_spec.rb +61 -0
- data/spec/fixture_spec.rb +114 -0
- data/spec/fixtures/bar.png +2907 -0
- data/spec/fixtures/default_from_body.html +13 -0
- data/spec/fixtures/default_from_meta.html +12 -0
- data/spec/fixtures/foo.png +0 -0
- data/spec/fixtures/google_shift_jis.html +6 -0
- data/spec/fixtures/google_utf8.html +6 -0
- data/spec/fixtures/og_not_valid_example.html +12 -0
- data/spec/fixtures/og_valid_example.html +18 -0
- data/spec/fixtures/og_valid_multi_image_example.html +13 -0
- data/spec/fixtures/og_valid_multi_video_example.html +13 -0
- data/spec/grader_spec.rb +27 -0
- data/spec/graders/base_spec.rb +14 -0
- data/spec/graders/html_attribute_spec.rb +50 -0
- data/spec/graders/length_spec.rb +93 -0
- data/spec/graders/link_density_spec.rb +52 -0
- data/spec/graders/position_spec.rb +49 -0
- data/spec/image_comparators/size_spec.rb +58 -0
- data/spec/image_validator_spec.rb +37 -0
- data/spec/model_spec.rb +27 -0
- data/spec/models/description_spec.rb +66 -0
- data/spec/models/favicon_spec.rb +12 -0
- data/spec/models/image_spec.rb +95 -0
- data/spec/models/title_spec.rb +26 -0
- data/spec/models/video_spec.rb +49 -0
- data/spec/models/website_spec.rb +51 -0
- data/spec/page_spec.rb +28 -0
- data/spec/processor_spec.rb +410 -0
- data/spec/response_spec.rb +62 -0
- data/spec/scraper_spec.rb +70 -0
- data/spec/scrapers/base_spec.rb +69 -0
- data/spec/scrapers/opengraph/base_spec.rb +96 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/uri_spec.rb +44 -0
- data/spec/video_parser_spec.rb +148 -0
- metadata +271 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/scrapers/default/base'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
module Scrapers
|
|
7
|
+
module Default
|
|
8
|
+
class Title < ::LinkThumbnailer::Scrapers::Default::Base
|
|
9
|
+
|
|
10
|
+
def value
|
|
11
|
+
model.to_s
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
private
|
|
15
|
+
|
|
16
|
+
def model
|
|
17
|
+
modelize(node)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def node
|
|
21
|
+
document.css(attribute_name)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/scrapers/default/base'
|
|
4
|
+
require 'link_thumbnailer/models/video'
|
|
5
|
+
|
|
6
|
+
module LinkThumbnailer
|
|
7
|
+
module Scrapers
|
|
8
|
+
module Default
|
|
9
|
+
class Videos < ::LinkThumbnailer::Scrapers::Default::Base
|
|
10
|
+
|
|
11
|
+
def value
|
|
12
|
+
nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/scrapers/base'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
module Scrapers
|
|
7
|
+
module Opengraph
|
|
8
|
+
class Base < ::LinkThumbnailer::Scrapers::Base
|
|
9
|
+
|
|
10
|
+
def applicable?
|
|
11
|
+
meta.any? { |node| opengraph_node?(node) }
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def value
|
|
15
|
+
model.to_s
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def model
|
|
21
|
+
modelize(node, node.attributes['content'].to_s) if node
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def node
|
|
25
|
+
@node ||= meta_xpath(attribute: attribute) ||
|
|
26
|
+
meta_xpath(attribute: attribute, key: :name)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def attribute
|
|
30
|
+
"og:#{attribute_name}"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def opengraph_node?(node)
|
|
34
|
+
node.attribute('name').to_s.start_with?('og:') ||
|
|
35
|
+
node.attribute('property').to_s.start_with?('og:')
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def meta
|
|
39
|
+
document.css('meta')
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/scrapers/opengraph/base'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
module Scrapers
|
|
7
|
+
module Opengraph
|
|
8
|
+
class Favicon < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
9
|
+
|
|
10
|
+
def value
|
|
11
|
+
nil
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/scrapers/opengraph/base'
|
|
4
|
+
require 'link_thumbnailer/uri'
|
|
5
|
+
|
|
6
|
+
module LinkThumbnailer
|
|
7
|
+
module Scrapers
|
|
8
|
+
module Opengraph
|
|
9
|
+
class Image < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
10
|
+
|
|
11
|
+
def value
|
|
12
|
+
::LinkThumbnailer::Scrapers::Opengraph::Image::Base.new(document, website).value +
|
|
13
|
+
::LinkThumbnailer::Scrapers::Opengraph::Image::Url.new(document, website).value
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
# Handles `og:image` attributes.
|
|
19
|
+
class Base < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
20
|
+
|
|
21
|
+
def value
|
|
22
|
+
model
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def model
|
|
26
|
+
nodes.map do |n|
|
|
27
|
+
uri = LinkThumbnailer::URI.new(n.attributes['content'])
|
|
28
|
+
modelize(n, uri.to_s) if uri.valid?
|
|
29
|
+
end.compact
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def modelize(node, text = nil)
|
|
33
|
+
model_class.new(text, size)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def model_class
|
|
37
|
+
::LinkThumbnailer::Models::Image
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def nodes
|
|
41
|
+
nodes = meta_xpaths(attribute: attribute)
|
|
42
|
+
nodes.empty? ? meta_xpaths(attribute: attribute, key: :name) : nodes
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def attribute
|
|
46
|
+
'og:image'
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def size
|
|
50
|
+
[width.to_i, height.to_i] if width && height
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def width
|
|
54
|
+
::LinkThumbnailer::Scrapers::Opengraph::Image::Width.new(document).value
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def height
|
|
58
|
+
::LinkThumbnailer::Scrapers::Opengraph::Image::Height.new(document).value
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Handles `og:image:url` attributes.
|
|
64
|
+
class Url < ::LinkThumbnailer::Scrapers::Opengraph::Image::Base
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
def attribute
|
|
69
|
+
'og:image:url'
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Handles `og:image:width` attributes.
|
|
75
|
+
class Width < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
76
|
+
|
|
77
|
+
def value
|
|
78
|
+
node.attributes['content'].to_s if node
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
def attribute
|
|
84
|
+
'og:image:width'
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Handles `og:image:height` attributes.
|
|
90
|
+
class Height < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
91
|
+
|
|
92
|
+
def value
|
|
93
|
+
node.attributes['content'].to_s if node
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
private
|
|
97
|
+
|
|
98
|
+
def attribute
|
|
99
|
+
'og:image:height'
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/scrapers/opengraph/base'
|
|
4
|
+
require 'link_thumbnailer/scrapers/opengraph/image'
|
|
5
|
+
|
|
6
|
+
module LinkThumbnailer
|
|
7
|
+
module Scrapers
|
|
8
|
+
module Opengraph
|
|
9
|
+
class Images < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
10
|
+
|
|
11
|
+
def call(attribute_name)
|
|
12
|
+
::LinkThumbnailer::Scrapers::Opengraph::Image.new(document, website).call('image')
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/scrapers/opengraph/base'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
module Scrapers
|
|
7
|
+
module Opengraph
|
|
8
|
+
class Video < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
9
|
+
|
|
10
|
+
def value
|
|
11
|
+
::LinkThumbnailer::Scrapers::Opengraph::Video::Base.new(document, website).value +
|
|
12
|
+
::LinkThumbnailer::Scrapers::Opengraph::Video::Url.new(document, website).value
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
# Handles `og:video` attributes.
|
|
18
|
+
class Base < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
19
|
+
|
|
20
|
+
def value
|
|
21
|
+
model
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def model
|
|
27
|
+
nodes.map { |n| modelize(n, n.attributes['content'].to_s) }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def modelize(node, text = nil)
|
|
31
|
+
model_class.new(text, size)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def model_class
|
|
35
|
+
::LinkThumbnailer::Models::Video
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def nodes
|
|
39
|
+
nodes = meta_xpaths(attribute: attribute)
|
|
40
|
+
nodes.empty? ? meta_xpaths(attribute: attribute, key: :name) : nodes
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def attribute
|
|
44
|
+
return 'og:url' if vimeo?
|
|
45
|
+
'og:video'
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Vimeo uses a SWF file for its og:video property which doesn't
|
|
49
|
+
# provide any metadata for the VideoInfo gem downstream. Using
|
|
50
|
+
# og:url means VideoInfo is passed a webpage URL with metadata
|
|
51
|
+
# it can parse.
|
|
52
|
+
def vimeo?
|
|
53
|
+
website.url.host =~ /vimeo/
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def size
|
|
57
|
+
[width.to_i, height.to_i] if width && height
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def width
|
|
61
|
+
::LinkThumbnailer::Scrapers::Opengraph::Video::Width.new(document).value
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def height
|
|
65
|
+
::LinkThumbnailer::Scrapers::Opengraph::Video::Height.new(document).value
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Handles `og:video:url` attributes.
|
|
71
|
+
class Url < ::LinkThumbnailer::Scrapers::Opengraph::Video::Base
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def attribute
|
|
76
|
+
super
|
|
77
|
+
'og:video:url'
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Handles `og:video:width` attributes.
|
|
83
|
+
class Width < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
84
|
+
|
|
85
|
+
def value
|
|
86
|
+
node.attributes['content'].to_s if node
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
def attribute
|
|
92
|
+
'og:video:width'
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Handles `og:video:height` attributes.
|
|
98
|
+
class Height < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
99
|
+
|
|
100
|
+
def value
|
|
101
|
+
node.attributes['content'].to_s if node
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
def attribute
|
|
107
|
+
'og:video:height'
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'link_thumbnailer/scrapers/opengraph/base'
|
|
4
|
+
require 'link_thumbnailer/scrapers/opengraph/video'
|
|
5
|
+
|
|
6
|
+
module LinkThumbnailer
|
|
7
|
+
module Scrapers
|
|
8
|
+
module Opengraph
|
|
9
|
+
class Videos < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
10
|
+
|
|
11
|
+
def call(attribute_name)
|
|
12
|
+
::LinkThumbnailer::Scrapers::Opengraph::Video.new(document, website).call('video')
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
class URI
|
|
5
|
+
|
|
6
|
+
attr_reader :attribute
|
|
7
|
+
|
|
8
|
+
def initialize(uri)
|
|
9
|
+
@attribute = uri.to_s
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def valid?
|
|
13
|
+
!!(attribute =~ ::URI::regexp)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def to_s
|
|
17
|
+
attribute
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'video_info'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
class VideoParser
|
|
7
|
+
|
|
8
|
+
attr_reader :parser
|
|
9
|
+
|
|
10
|
+
def initialize(video)
|
|
11
|
+
@parser = ::VideoInfo.new(video.src.dup.to_s)
|
|
12
|
+
rescue ::VideoInfo::UrlError
|
|
13
|
+
@parser = nil
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def id
|
|
17
|
+
parser.video_id
|
|
18
|
+
rescue NoMethodError
|
|
19
|
+
nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def size
|
|
23
|
+
[parser.width, parser.height]
|
|
24
|
+
rescue NoMethodError, ::OpenURI::HTTPError
|
|
25
|
+
[]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def duration
|
|
29
|
+
parser.duration
|
|
30
|
+
rescue NoMethodError, ::OpenURI::HTTPError
|
|
31
|
+
nil
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def provider
|
|
35
|
+
parser.provider
|
|
36
|
+
rescue NoMethodError, ::OpenURI::HTTPError
|
|
37
|
+
nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def embed_code
|
|
41
|
+
parser.embed_code
|
|
42
|
+
rescue NoMethodError, ::OpenURI::HTTPError
|
|
43
|
+
nil
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
end
|