link_thumbnailer 1.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/.travis.yml +1 -1
- data/CHANGELOG.md +117 -104
- data/Gemfile +1 -1
- data/{LICENSE → LICENSE.txt} +21 -21
- data/README.md +153 -184
- data/lib/generators/link_thumbnailer/install_generator.rb +0 -4
- data/lib/generators/templates/initializer.rb +63 -41
- data/lib/link_thumbnailer/configuration.rb +52 -10
- data/lib/link_thumbnailer/exceptions.rb +6 -0
- data/lib/link_thumbnailer/grader.rb +37 -0
- data/lib/link_thumbnailer/graders/base.rb +32 -0
- data/lib/link_thumbnailer/graders/html_attribute.rb +49 -0
- data/lib/link_thumbnailer/graders/length.rb +19 -0
- data/lib/link_thumbnailer/graders/link_density.rb +21 -0
- data/lib/link_thumbnailer/graders/position.rb +11 -0
- data/lib/link_thumbnailer/image_comparator.rb +24 -0
- data/lib/link_thumbnailer/image_comparators/base.rb +17 -0
- data/lib/link_thumbnailer/image_comparators/size.rb +11 -0
- data/lib/link_thumbnailer/image_parser.rb +18 -0
- data/lib/link_thumbnailer/image_parsers/size.rb +15 -0
- data/lib/link_thumbnailer/image_parsers/type.rb +15 -0
- data/lib/link_thumbnailer/image_validator.rb +30 -0
- data/lib/link_thumbnailer/model.rb +16 -0
- data/lib/link_thumbnailer/models/description.rb +34 -0
- data/lib/link_thumbnailer/models/image.rb +54 -0
- data/lib/link_thumbnailer/models/title.rb +20 -0
- data/lib/link_thumbnailer/models/website.rb +39 -0
- data/lib/link_thumbnailer/page.rb +40 -0
- data/lib/link_thumbnailer/parser.rb +13 -0
- data/lib/link_thumbnailer/processor.rb +94 -0
- data/lib/link_thumbnailer/railtie.rb +9 -9
- data/lib/link_thumbnailer/scraper.rb +64 -0
- data/lib/link_thumbnailer/scrapers/base.rb +63 -0
- data/lib/link_thumbnailer/scrapers/default/base.rb +10 -0
- data/lib/link_thumbnailer/scrapers/default/description.rb +47 -0
- data/lib/link_thumbnailer/scrapers/default/images.rb +64 -0
- data/lib/link_thumbnailer/scrapers/default/title.rb +25 -0
- data/lib/link_thumbnailer/scrapers/opengraph/base.rb +43 -0
- data/lib/link_thumbnailer/scrapers/opengraph/description.rb +10 -0
- data/lib/link_thumbnailer/scrapers/opengraph/image.rb +30 -0
- data/lib/link_thumbnailer/scrapers/opengraph/images.rb +16 -0
- data/lib/link_thumbnailer/scrapers/opengraph/title.rb +10 -0
- data/lib/link_thumbnailer/version.rb +3 -3
- data/lib/link_thumbnailer.rb +36 -119
- data/link_thumbnailer.gemspec +26 -28
- data/spec/configuration_spec.rb +51 -0
- data/spec/examples/empty_og_image_example.html +9 -0
- data/spec/fixture_spec.rb +88 -0
- data/spec/fixtures/bar.png +2907 -0
- data/spec/fixtures/default_from_body.html +12 -0
- data/spec/fixtures/default_from_meta.html +11 -0
- data/spec/{examples → fixtures}/example.html +53 -53
- data/spec/fixtures/foo.png +0 -0
- data/spec/fixtures/og_not_valid_example.html +12 -0
- data/spec/fixtures/og_valid_example.html +12 -0
- data/spec/fixtures/og_valid_multi_image_example.html +13 -0
- data/spec/grader_spec.rb +24 -0
- data/spec/graders/base_spec.rb +12 -0
- data/spec/graders/html_attribute_spec.rb +48 -0
- data/spec/graders/length_spec.rb +81 -0
- data/spec/graders/link_density_spec.rb +22 -0
- data/spec/image_comparators/size_spec.rb +39 -0
- data/spec/image_parsers/size_spec.rb +34 -0
- data/spec/image_parsers/type_spec.rb +34 -0
- data/spec/image_validator_spec.rb +35 -0
- data/spec/model_spec.rb +17 -0
- data/spec/models/description_spec.rb +64 -0
- data/spec/models/image_spec.rb +71 -0
- data/spec/models/title_spec.rb +24 -0
- data/spec/models/website_spec.rb +49 -0
- data/spec/page_spec.rb +26 -0
- data/spec/processor_spec.rb +349 -0
- data/spec/scraper_spec.rb +95 -0
- data/spec/scrapers/base_spec.rb +67 -0
- data/spec/scrapers/opengraph/base_spec.rb +94 -0
- data/spec/spec_helper.rb +15 -13
- metadata +126 -120
- data/app/controllers/link_thumbnailer/application_controller.rb +0 -4
- data/app/controllers/link_thumbnailer/previews_controller.rb +0 -11
- data/lib/link_thumbnailer/doc.rb +0 -65
- data/lib/link_thumbnailer/doc_parser.rb +0 -15
- data/lib/link_thumbnailer/engine.rb +0 -4
- data/lib/link_thumbnailer/fetcher.rb +0 -34
- data/lib/link_thumbnailer/img_comparator.rb +0 -17
- data/lib/link_thumbnailer/img_parser.rb +0 -41
- data/lib/link_thumbnailer/img_url_filter.rb +0 -13
- data/lib/link_thumbnailer/object.rb +0 -41
- data/lib/link_thumbnailer/opengraph.rb +0 -20
- data/lib/link_thumbnailer/rails/routes/mapper.rb +0 -30
- data/lib/link_thumbnailer/rails/routes/mapping.rb +0 -33
- data/lib/link_thumbnailer/rails/routes.rb +0 -47
- data/lib/link_thumbnailer/web_image.rb +0 -19
- data/spec/doc_parser_spec.rb +0 -25
- data/spec/doc_spec.rb +0 -23
- data/spec/examples/empty_example.html +0 -11
- data/spec/examples/og_example.html +0 -12
- data/spec/fetcher_spec.rb +0 -97
- data/spec/img_comparator_spec.rb +0 -16
- data/spec/img_url_filter_spec.rb +0 -31
- data/spec/link_thumbnailer_spec.rb +0 -205
- data/spec/object_spec.rb +0 -130
- data/spec/opengraph_spec.rb +0 -7
- data/spec/web_image_spec.rb +0 -57
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
require 'link_thumbnailer/processor'
|
|
2
|
+
require 'link_thumbnailer/scraper'
|
|
3
|
+
|
|
4
|
+
module LinkThumbnailer
|
|
5
|
+
class Page
|
|
6
|
+
|
|
7
|
+
attr_reader :url, :options, :source
|
|
8
|
+
|
|
9
|
+
def initialize(url, options = {})
|
|
10
|
+
@url = url
|
|
11
|
+
@options = options
|
|
12
|
+
|
|
13
|
+
set_options
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def generate
|
|
17
|
+
@source = processor.call(url)
|
|
18
|
+
scraper.call
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def config
|
|
22
|
+
@config ||= ::LinkThumbnailer.config.dup
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def set_options
|
|
28
|
+
options.each { |k, v| config.send("#{k}=", v) }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def processor
|
|
32
|
+
@processor ||= ::LinkThumbnailer::Processor.new
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def scraper
|
|
36
|
+
@scraper ||= ::LinkThumbnailer::Scraper.new(source, processor.url)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
require 'delegate'
|
|
2
|
+
require 'net/http/persistent'
|
|
3
|
+
|
|
4
|
+
module LinkThumbnailer
|
|
5
|
+
class Processor < ::SimpleDelegator
|
|
6
|
+
|
|
7
|
+
attr_accessor :url
|
|
8
|
+
attr_reader :config, :http, :redirect_count
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
@config = ::LinkThumbnailer.page.config
|
|
12
|
+
@http = ::Net::HTTP::Persistent.new
|
|
13
|
+
|
|
14
|
+
super(config)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def call(url = '', redirect_count = 0)
|
|
18
|
+
self.url = url
|
|
19
|
+
@redirect_count = redirect_count
|
|
20
|
+
|
|
21
|
+
raise ::LinkThumbnailer::RedirectLimit if too_many_redirections?
|
|
22
|
+
|
|
23
|
+
with_valid_url do
|
|
24
|
+
set_http_headers
|
|
25
|
+
set_http_options
|
|
26
|
+
perform_request
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def with_valid_url
|
|
33
|
+
raise ::LinkThumbnailer::BadUriFormat unless valid_url_format?
|
|
34
|
+
yield if block_given?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def set_http_headers
|
|
38
|
+
http.headers['User-Agent'] = user_agent
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def set_http_options
|
|
42
|
+
http.verify_mode = ::OpenSSL::SSL::VERIFY_NONE unless ssl_required?
|
|
43
|
+
http.open_timeout = http_timeout
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def perform_request
|
|
47
|
+
response = http.request(url)
|
|
48
|
+
case response
|
|
49
|
+
when ::Net::HTTPSuccess then response.body
|
|
50
|
+
when ::Net::HTTPRedirection
|
|
51
|
+
call resolve_relative_url(response['location']), redirect_count + 1
|
|
52
|
+
else
|
|
53
|
+
response.error!
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def resolve_relative_url(location)
|
|
58
|
+
location.start_with?('http') ? location : build_absolute_url_for(location)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def build_absolute_url_for(relative_url)
|
|
62
|
+
URI("#{url.scheme}://#{url.host}#{relative_url}")
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def redirect_limit
|
|
66
|
+
config.redirect_limit
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def user_agent
|
|
70
|
+
config.user_agent
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def http_timeout
|
|
74
|
+
config.http_timeout
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def ssl_required?
|
|
78
|
+
config.verify_ssl
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def too_many_redirections?
|
|
82
|
+
redirect_count > redirect_limit
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def valid_url_format?
|
|
86
|
+
url.is_a?(URI::HTTP)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def url=(url)
|
|
90
|
+
@url = URI(url)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
module LinkThumbnailer
|
|
2
|
-
class Railtie < ::Rails::Railtie
|
|
3
|
-
|
|
4
|
-
initializer 'link_thumbnailer.routes' do
|
|
5
|
-
LinkThumbnailer::Rails::Routes.install!
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
end
|
|
9
|
-
end
|
|
1
|
+
module LinkThumbnailer
|
|
2
|
+
class Railtie < ::Rails::Railtie
|
|
3
|
+
|
|
4
|
+
initializer 'link_thumbnailer.routes' do
|
|
5
|
+
LinkThumbnailer::Rails::Routes.install!
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
end
|
|
9
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require 'delegate'
|
|
2
|
+
require 'active_support/core_ext/object/blank'
|
|
3
|
+
require 'active_support/inflector'
|
|
4
|
+
|
|
5
|
+
require 'link_thumbnailer/parser'
|
|
6
|
+
require 'link_thumbnailer/models/website'
|
|
7
|
+
require 'link_thumbnailer/scrapers/default/title'
|
|
8
|
+
require 'link_thumbnailer/scrapers/opengraph/title'
|
|
9
|
+
require 'link_thumbnailer/scrapers/default/description'
|
|
10
|
+
require 'link_thumbnailer/scrapers/opengraph/description'
|
|
11
|
+
require 'link_thumbnailer/scrapers/default/images'
|
|
12
|
+
require 'link_thumbnailer/scrapers/opengraph/images'
|
|
13
|
+
|
|
14
|
+
module LinkThumbnailer
|
|
15
|
+
class Scraper < ::SimpleDelegator
|
|
16
|
+
|
|
17
|
+
attr_reader :document, :source, :url, :config, :website
|
|
18
|
+
|
|
19
|
+
def initialize(source, url)
|
|
20
|
+
@source = source
|
|
21
|
+
@url = url
|
|
22
|
+
@config = ::LinkThumbnailer.page.config
|
|
23
|
+
@document = parser.call(source)
|
|
24
|
+
@website = ::LinkThumbnailer::Models::Website.new
|
|
25
|
+
@website.url = url
|
|
26
|
+
|
|
27
|
+
super(config)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def call
|
|
31
|
+
config.attributes.each do |name|
|
|
32
|
+
scrapers.each do |scraper_prefix|
|
|
33
|
+
scraper = scraper_class(scraper_prefix, name).new(document)
|
|
34
|
+
scraper.call(website, name.to_s) if scraper.applicable?
|
|
35
|
+
|
|
36
|
+
break unless website.send(name).blank?
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
website
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def scrapers
|
|
46
|
+
[
|
|
47
|
+
"::LinkThumbnailer::Scrapers::Opengraph",
|
|
48
|
+
"::LinkThumbnailer::Scrapers::Default"
|
|
49
|
+
]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def scraper_class(prefix, name)
|
|
53
|
+
name = name.to_s.camelize
|
|
54
|
+
"#{prefix}::#{name}".constantize
|
|
55
|
+
rescue NameError
|
|
56
|
+
raise ::LinkThumbnailer::ScraperInvalid, "scraper named '#{prefix}::#{name}' does not exists."
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def parser
|
|
60
|
+
::LinkThumbnailer::Parser.new
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
require 'delegate'
|
|
2
|
+
require 'link_thumbnailer/models/title'
|
|
3
|
+
require 'link_thumbnailer/models/description'
|
|
4
|
+
require 'link_thumbnailer/models/image'
|
|
5
|
+
|
|
6
|
+
module LinkThumbnailer
|
|
7
|
+
module Scrapers
|
|
8
|
+
class Base < ::SimpleDelegator
|
|
9
|
+
|
|
10
|
+
attr_reader :config, :document, :website, :attribute_name
|
|
11
|
+
|
|
12
|
+
def initialize(document)
|
|
13
|
+
@config = ::LinkThumbnailer.page.config
|
|
14
|
+
@document = document
|
|
15
|
+
|
|
16
|
+
super(config)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def call(website, attribute_name)
|
|
20
|
+
@website = website
|
|
21
|
+
@attribute_name = attribute_name
|
|
22
|
+
|
|
23
|
+
website.send("#{attribute_name}=", value)
|
|
24
|
+
website
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def applicable?
|
|
28
|
+
true
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def value
|
|
34
|
+
raise 'must implement'
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def meta_xpath(options = {})
|
|
38
|
+
meta_xpaths(options).first
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def meta_xpaths(options = {})
|
|
42
|
+
key = options.fetch(:key, :property)
|
|
43
|
+
value = options.fetch(:value, :content)
|
|
44
|
+
attribute = options.fetch(:attribute, attribute_name)
|
|
45
|
+
|
|
46
|
+
document.xpath("//meta[translate(@#{key},'#{abc.upcase}','#{abc}') = '#{attribute}' and @#{value}]")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def abc
|
|
50
|
+
'abcdefghijklmnopqrstuvwxyz'
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def model_class
|
|
54
|
+
"::LinkThumbnailer::Models::#{attribute_name.to_s.camelize}".constantize
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def modelize(node, text = nil)
|
|
58
|
+
model_class.new(node, text)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require 'link_thumbnailer/scrapers/default/base'
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module Scrapers
|
|
5
|
+
module Default
|
|
6
|
+
class Description < ::LinkThumbnailer::Scrapers::Default::Base
|
|
7
|
+
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
def value
|
|
11
|
+
return model_from_meta.to_s if model_from_meta
|
|
12
|
+
return model_from_body.to_s if model_from_body
|
|
13
|
+
nil
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def model_from_meta
|
|
17
|
+
modelize(node_from_meta, node_from_meta.attributes['content'].value) if node_from_meta
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def model_from_body
|
|
21
|
+
nodes_from_body.each_with_index.map { |node, i| modelize(node, node.text, i + 1) }.sort.last
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def node_from_meta
|
|
25
|
+
@node_from_meta ||= meta_xpath(key: :name)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def nodes_from_body
|
|
29
|
+
candidates.select { |node| valid_paragraph?(node) }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def valid_paragraph?(node)
|
|
33
|
+
true
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def candidates
|
|
37
|
+
document.css('p,td')
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def modelize(node, text, i = 1)
|
|
41
|
+
model_class.new(node, text, i)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require 'link_thumbnailer/scrapers/default/base'
|
|
2
|
+
require 'link_thumbnailer/models/image'
|
|
3
|
+
|
|
4
|
+
module LinkThumbnailer
|
|
5
|
+
module Scrapers
|
|
6
|
+
module Default
|
|
7
|
+
class Images < ::LinkThumbnailer::Scrapers::Default::Base
|
|
8
|
+
|
|
9
|
+
private
|
|
10
|
+
|
|
11
|
+
def value
|
|
12
|
+
abs_urls.each_with_index.take_while { |_, i| i < config.image_limit }.map { |e| modelize(e.first) }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def urls
|
|
16
|
+
document.search('//img').map { |i| i['src'] }.compact
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def abs_urls
|
|
20
|
+
urls.map do |url|
|
|
21
|
+
uri = validate_url(url)
|
|
22
|
+
|
|
23
|
+
next unless uri
|
|
24
|
+
|
|
25
|
+
uri = prefix_uri(uri) if needs_prefix?(uri)
|
|
26
|
+
uri
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def validate_url(url)
|
|
31
|
+
URI(url)
|
|
32
|
+
rescue URI::InvalidURIError
|
|
33
|
+
nil
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def needs_prefix?(uri)
|
|
37
|
+
!uri.is_a?(URI::HTTP)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def prefix_uri(uri)
|
|
41
|
+
URI.join(prefix_url, uri)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def prefix_url
|
|
45
|
+
base_href || website.url
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def base_href
|
|
49
|
+
base = document.at('//head/base')
|
|
50
|
+
base['href'] if base
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def model_class
|
|
54
|
+
::LinkThumbnailer::Models::Image
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def modelize(uri)
|
|
58
|
+
model_class.new(uri)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require 'link_thumbnailer/scrapers/default/base'
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module Scrapers
|
|
5
|
+
module Default
|
|
6
|
+
class Title < ::LinkThumbnailer::Scrapers::Default::Base
|
|
7
|
+
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
def value
|
|
11
|
+
model.to_s
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def model
|
|
15
|
+
modelize(node)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def node
|
|
19
|
+
document.css(attribute_name)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
require 'link_thumbnailer/scrapers/base'
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module Scrapers
|
|
5
|
+
module Opengraph
|
|
6
|
+
class Base < ::LinkThumbnailer::Scrapers::Base
|
|
7
|
+
|
|
8
|
+
def applicable?
|
|
9
|
+
meta.any? { |node| opengraph_node?(node) }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def value
|
|
15
|
+
model.to_s
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def model
|
|
19
|
+
modelize(node, node.attributes['content'].to_s) if node
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def node
|
|
23
|
+
@node ||= meta_xpath(attribute: attribute) ||
|
|
24
|
+
meta_xpath(attribute: attribute, key: :name)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def attribute
|
|
28
|
+
"og:#{attribute_name}"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def opengraph_node?(node)
|
|
32
|
+
node.attribute('name').to_s.start_with?('og:') ||
|
|
33
|
+
node.attribute('property').to_s.start_with?('og:')
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def meta
|
|
37
|
+
document.css('meta')
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require 'link_thumbnailer/scrapers/opengraph/base'
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module Scrapers
|
|
5
|
+
module Opengraph
|
|
6
|
+
class Image < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
7
|
+
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
def value
|
|
11
|
+
model
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def model
|
|
15
|
+
nodes.map { |n| modelize(n, n.attributes['content'].to_s) }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def modelize(node, text = nil)
|
|
19
|
+
model_class.new(text)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def nodes
|
|
23
|
+
nodes = meta_xpaths(attribute: attribute)
|
|
24
|
+
nodes.empty? ? meta_xpaths(attribute: attribute, key: :name) : nodes
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require 'link_thumbnailer/scrapers/opengraph/base'
|
|
2
|
+
require 'link_thumbnailer/scrapers/opengraph/image'
|
|
3
|
+
|
|
4
|
+
module LinkThumbnailer
|
|
5
|
+
module Scrapers
|
|
6
|
+
module Opengraph
|
|
7
|
+
class Images < ::LinkThumbnailer::Scrapers::Opengraph::Base
|
|
8
|
+
|
|
9
|
+
def call(website, attribute_name)
|
|
10
|
+
::LinkThumbnailer::Scrapers::Opengraph::Image.new(document).call(website, 'image')
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
module LinkThumbnailer
|
|
2
|
-
VERSION =
|
|
3
|
-
end
|
|
1
|
+
module LinkThumbnailer
|
|
2
|
+
VERSION = '2.0.0'
|
|
3
|
+
end
|