link_thumbnailer 3.3.1 → 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/Gemfile +5 -3
- data/Rakefile +2 -0
- data/lib/generators/link_thumbnailer/install_generator.rb +2 -0
- data/lib/generators/templates/initializer.rb +2 -0
- data/lib/link_thumbnailer.rb +2 -0
- data/lib/link_thumbnailer/configuration.rb +72 -70
- data/lib/link_thumbnailer/exceptions.rb +2 -0
- data/lib/link_thumbnailer/grader.rb +2 -0
- data/lib/link_thumbnailer/graders/base.rb +2 -0
- data/lib/link_thumbnailer/graders/html_attribute.rb +2 -0
- data/lib/link_thumbnailer/graders/length.rb +2 -0
- data/lib/link_thumbnailer/graders/link_density.rb +2 -0
- data/lib/link_thumbnailer/graders/position.rb +2 -0
- data/lib/link_thumbnailer/image_comparator.rb +2 -0
- data/lib/link_thumbnailer/image_comparators/base.rb +2 -0
- data/lib/link_thumbnailer/image_comparators/size.rb +2 -0
- data/lib/link_thumbnailer/image_parser.rb +13 -1
- data/lib/link_thumbnailer/image_validator.rb +2 -0
- data/lib/link_thumbnailer/model.rb +20 -17
- data/lib/link_thumbnailer/models/description.rb +2 -0
- data/lib/link_thumbnailer/models/favicon.rb +2 -0
- data/lib/link_thumbnailer/models/image.rb +56 -54
- data/lib/link_thumbnailer/models/title.rb +2 -0
- data/lib/link_thumbnailer/models/video.rb +2 -0
- data/lib/link_thumbnailer/models/website.rb +54 -52
- data/lib/link_thumbnailer/page.rb +2 -0
- data/lib/link_thumbnailer/parser.rb +2 -0
- data/lib/link_thumbnailer/processor.rb +2 -0
- data/lib/link_thumbnailer/railtie.rb +2 -0
- data/lib/link_thumbnailer/response.rb +2 -0
- data/lib/link_thumbnailer/scraper.rb +62 -60
- data/lib/link_thumbnailer/scrapers/base.rb +69 -67
- data/lib/link_thumbnailer/scrapers/default/base.rb +2 -0
- data/lib/link_thumbnailer/scrapers/default/description.rb +2 -0
- data/lib/link_thumbnailer/scrapers/default/favicon.rb +2 -0
- data/lib/link_thumbnailer/scrapers/default/images.rb +5 -1
- data/lib/link_thumbnailer/scrapers/default/title.rb +2 -0
- data/lib/link_thumbnailer/scrapers/default/videos.rb +2 -0
- data/lib/link_thumbnailer/scrapers/opengraph/base.rb +2 -0
- data/lib/link_thumbnailer/scrapers/opengraph/description.rb +2 -0
- data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +2 -0
- data/lib/link_thumbnailer/scrapers/opengraph/image.rb +2 -0
- data/lib/link_thumbnailer/scrapers/opengraph/images.rb +2 -0
- data/lib/link_thumbnailer/scrapers/opengraph/title.rb +2 -0
- data/lib/link_thumbnailer/scrapers/opengraph/video.rb +2 -0
- data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +2 -0
- data/lib/link_thumbnailer/uri.rb +2 -0
- data/lib/link_thumbnailer/version.rb +3 -1
- data/lib/link_thumbnailer/video_parser.rb +3 -1
- data/link_thumbnailer.gemspec +7 -5
- data/spec/configuration_spec.rb +2 -0
- data/spec/fixture_spec.rb +2 -0
- data/spec/grader_spec.rb +2 -0
- data/spec/graders/base_spec.rb +2 -0
- data/spec/graders/html_attribute_spec.rb +2 -0
- data/spec/graders/length_spec.rb +2 -0
- data/spec/graders/link_density_spec.rb +2 -0
- data/spec/graders/position_spec.rb +2 -0
- data/spec/image_comparators/size_spec.rb +2 -0
- data/spec/image_validator_spec.rb +2 -0
- data/spec/model_spec.rb +2 -0
- data/spec/models/description_spec.rb +2 -0
- data/spec/models/favicon_spec.rb +2 -0
- data/spec/models/image_spec.rb +2 -0
- data/spec/models/title_spec.rb +2 -0
- data/spec/models/video_spec.rb +2 -0
- data/spec/models/website_spec.rb +2 -0
- data/spec/page_spec.rb +2 -0
- data/spec/processor_spec.rb +2 -0
- data/spec/response_spec.rb +2 -0
- data/spec/scraper_spec.rb +2 -0
- data/spec/scrapers/base_spec.rb +2 -0
- data/spec/scrapers/opengraph/base_spec.rb +2 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/uri_spec.rb +2 -0
- data/spec/video_parser_spec.rb +2 -0
- metadata +23 -23
@@ -1,52 +1,54 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'link_thumbnailer/model'
|
4
|
+
|
5
|
+
module LinkThumbnailer
|
6
|
+
module Models
|
7
|
+
class Website < ::LinkThumbnailer::Model
|
8
|
+
|
9
|
+
attr_accessor :url, :title, :description, :images, :videos, :favicon
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@images = []
|
13
|
+
@videos = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def video=(video)
|
17
|
+
self.videos = video
|
18
|
+
end
|
19
|
+
|
20
|
+
def videos=(videos)
|
21
|
+
Array(videos).each do |video|
|
22
|
+
@videos << video
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def image=(image)
|
27
|
+
self.images = image
|
28
|
+
end
|
29
|
+
|
30
|
+
def images=(images)
|
31
|
+
Array(images).each do |image|
|
32
|
+
next unless image.valid?
|
33
|
+
@images << image
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def images
|
38
|
+
@images.sort!
|
39
|
+
end
|
40
|
+
|
41
|
+
def as_json(*)
|
42
|
+
{
|
43
|
+
url: url.to_s,
|
44
|
+
favicon: favicon,
|
45
|
+
title: title,
|
46
|
+
description: description,
|
47
|
+
images: images.map(&:as_json),
|
48
|
+
videos: videos.map(&:as_json)
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -1,60 +1,62 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require '
|
4
|
-
|
5
|
-
require '
|
6
|
-
|
7
|
-
require 'link_thumbnailer/
|
8
|
-
require 'link_thumbnailer/
|
9
|
-
require 'link_thumbnailer/scrapers/default/
|
10
|
-
require 'link_thumbnailer/scrapers/opengraph/
|
11
|
-
require 'link_thumbnailer/scrapers/default/
|
12
|
-
require 'link_thumbnailer/scrapers/opengraph/
|
13
|
-
require 'link_thumbnailer/scrapers/default/
|
14
|
-
require 'link_thumbnailer/scrapers/opengraph/
|
15
|
-
require 'link_thumbnailer/scrapers/default/
|
16
|
-
require 'link_thumbnailer/scrapers/opengraph/
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
@
|
27
|
-
@
|
28
|
-
@
|
29
|
-
@
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
"
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'delegate'
|
4
|
+
require 'active_support/core_ext/object/blank'
|
5
|
+
require 'active_support/inflector'
|
6
|
+
|
7
|
+
require 'link_thumbnailer/parser'
|
8
|
+
require 'link_thumbnailer/models/website'
|
9
|
+
require 'link_thumbnailer/scrapers/default/title'
|
10
|
+
require 'link_thumbnailer/scrapers/opengraph/title'
|
11
|
+
require 'link_thumbnailer/scrapers/default/description'
|
12
|
+
require 'link_thumbnailer/scrapers/opengraph/description'
|
13
|
+
require 'link_thumbnailer/scrapers/default/images'
|
14
|
+
require 'link_thumbnailer/scrapers/opengraph/images'
|
15
|
+
require 'link_thumbnailer/scrapers/default/videos'
|
16
|
+
require 'link_thumbnailer/scrapers/opengraph/videos'
|
17
|
+
require 'link_thumbnailer/scrapers/default/favicon'
|
18
|
+
require 'link_thumbnailer/scrapers/opengraph/favicon'
|
19
|
+
|
20
|
+
module LinkThumbnailer
|
21
|
+
class Scraper < ::SimpleDelegator
|
22
|
+
|
23
|
+
attr_reader :document, :source, :url, :config, :website
|
24
|
+
|
25
|
+
def initialize(source, url)
|
26
|
+
@source = source
|
27
|
+
@url = url
|
28
|
+
@config = ::LinkThumbnailer.page.config
|
29
|
+
@document = parser.call(source)
|
30
|
+
@website = ::LinkThumbnailer::Models::Website.new
|
31
|
+
@website.url = url
|
32
|
+
|
33
|
+
super(config)
|
34
|
+
end
|
35
|
+
|
36
|
+
def call
|
37
|
+
config.attributes.each do |name|
|
38
|
+
config.scrapers.each do |scraper_prefix|
|
39
|
+
scraper_class(scraper_prefix, name).new(document, website).call(name.to_s)
|
40
|
+
break unless website.send(name).blank?
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
website
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def scraper_class(prefix, name)
|
50
|
+
prefix = "::LinkThumbnailer::Scrapers::#{prefix.to_s.camelize}"
|
51
|
+
name = name.to_s.camelize
|
52
|
+
"#{prefix}::#{name}".constantize
|
53
|
+
rescue NameError
|
54
|
+
raise ::LinkThumbnailer::ScraperInvalid, "scraper named '#{prefix}::#{name}' does not exists."
|
55
|
+
end
|
56
|
+
|
57
|
+
def parser
|
58
|
+
::LinkThumbnailer::Parser.new
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
@@ -1,67 +1,69 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require '
|
4
|
-
require 'link_thumbnailer/models/
|
5
|
-
require 'link_thumbnailer/models/
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
@
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'delegate'
|
4
|
+
require 'link_thumbnailer/models/title'
|
5
|
+
require 'link_thumbnailer/models/description'
|
6
|
+
require 'link_thumbnailer/models/image'
|
7
|
+
require 'link_thumbnailer/models/video'
|
8
|
+
|
9
|
+
module LinkThumbnailer
|
10
|
+
module Scrapers
|
11
|
+
class Base < ::SimpleDelegator
|
12
|
+
|
13
|
+
attr_reader :config, :document, :website, :attribute_name
|
14
|
+
|
15
|
+
def initialize(document, website = nil)
|
16
|
+
@config = ::LinkThumbnailer.page.config
|
17
|
+
@document = document
|
18
|
+
@website = website
|
19
|
+
|
20
|
+
super(config)
|
21
|
+
end
|
22
|
+
|
23
|
+
def call(attribute_name)
|
24
|
+
return false unless website.present?
|
25
|
+
return false unless applicable?
|
26
|
+
|
27
|
+
@attribute_name = attribute_name
|
28
|
+
|
29
|
+
website.send("#{attribute_name}=", value)
|
30
|
+
website
|
31
|
+
end
|
32
|
+
|
33
|
+
def applicable?
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
def value
|
38
|
+
fail NotImplementedError
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def meta_xpath(options = {})
|
44
|
+
meta_xpaths(options).first
|
45
|
+
end
|
46
|
+
|
47
|
+
def meta_xpaths(options = {})
|
48
|
+
key = options.fetch(:key, :property)
|
49
|
+
value = options.fetch(:value, :content)
|
50
|
+
attribute = options.fetch(:attribute, attribute_name)
|
51
|
+
|
52
|
+
document.xpath("//meta[translate(@#{key},'#{abc.upcase}','#{abc}') = '#{attribute}' and string-length(@#{value}) > 0]")
|
53
|
+
end
|
54
|
+
|
55
|
+
def abc
|
56
|
+
'abcdefghijklmnopqrstuvwxyz'
|
57
|
+
end
|
58
|
+
|
59
|
+
def model_class
|
60
|
+
"::LinkThumbnailer::Models::#{attribute_name.to_s.camelize}".constantize
|
61
|
+
end
|
62
|
+
|
63
|
+
def modelize(node, text = nil)
|
64
|
+
model_class.new(node, text)
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'link_thumbnailer/scrapers/default/base'
|
2
4
|
require 'link_thumbnailer/models/image'
|
3
5
|
|
@@ -57,7 +59,9 @@ module LinkThumbnailer
|
|
57
59
|
|
58
60
|
def base_href
|
59
61
|
base = document.at('//head/base')
|
60
|
-
base['href'] if base
|
62
|
+
base['href'] if base && ::URI.parse(base['href']).host
|
63
|
+
rescue ::URI::InvalidURIError
|
64
|
+
nil
|
61
65
|
end
|
62
66
|
|
63
67
|
def model_class
|