rcarvalho-link_thumbnailer 1.0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/.rspec +2 -0
- data/.travis.yml +6 -0
- data/CHANGELOG.md +91 -0
- data/Gemfile +12 -0
- data/LICENSE +22 -0
- data/README.md +184 -0
- data/Rakefile +7 -0
- data/app/controllers/link_thumbnailer/application_controller.rb +4 -0
- data/app/controllers/link_thumbnailer/previews_controller.rb +11 -0
- data/lib/generators/link_thumbnailer/install_generator.rb +19 -0
- data/lib/generators/templates/initializer.rb +41 -0
- data/lib/link_thumbnailer.rb +96 -0
- data/lib/link_thumbnailer/configuration.rb +6 -0
- data/lib/link_thumbnailer/doc.rb +65 -0
- data/lib/link_thumbnailer/doc_parser.rb +15 -0
- data/lib/link_thumbnailer/engine.rb +9 -0
- data/lib/link_thumbnailer/fetcher.rb +34 -0
- data/lib/link_thumbnailer/img_comparator.rb +18 -0
- data/lib/link_thumbnailer/img_parser.rb +46 -0
- data/lib/link_thumbnailer/img_url_filter.rb +13 -0
- data/lib/link_thumbnailer/object.rb +41 -0
- data/lib/link_thumbnailer/opengraph.rb +20 -0
- data/lib/link_thumbnailer/rails/routes.rb +47 -0
- data/lib/link_thumbnailer/rails/routes/mapper.rb +30 -0
- data/lib/link_thumbnailer/rails/routes/mapping.rb +33 -0
- data/lib/link_thumbnailer/version.rb +3 -0
- data/lib/link_thumbnailer/web_image.rb +18 -0
- data/link_thumbnailer.gemspec +28 -0
- data/spec/doc_parser_spec.rb +25 -0
- data/spec/doc_spec.rb +23 -0
- data/spec/examples/empty_example.html +11 -0
- data/spec/examples/example.html +363 -0
- data/spec/examples/og_example.html +12 -0
- data/spec/fetcher_spec.rb +97 -0
- data/spec/img_comparator_spec.rb +16 -0
- data/spec/img_url_filter_spec.rb +31 -0
- data/spec/link_thumbnailer_spec.rb +205 -0
- data/spec/object_spec.rb +130 -0
- data/spec/opengraph_spec.rb +7 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/web_image_spec.rb +57 -0
- metadata +245 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module LinkThumbnailer
|
4
|
+
|
5
|
+
module Doc
|
6
|
+
|
7
|
+
def doc_base_href
|
8
|
+
base = at('//head/base')
|
9
|
+
base['href'] if base
|
10
|
+
end
|
11
|
+
|
12
|
+
def img_srcs
|
13
|
+
search('//img').map { |i| i['src'] }.compact
|
14
|
+
end
|
15
|
+
|
16
|
+
def img_abs_urls(base_url = nil)
|
17
|
+
result = []
|
18
|
+
|
19
|
+
img_srcs.each do |i|
|
20
|
+
begin
|
21
|
+
u = URI(i)
|
22
|
+
rescue URI::InvalidURIError
|
23
|
+
next
|
24
|
+
end
|
25
|
+
|
26
|
+
result << if u.is_a?(URI::HTTP)
|
27
|
+
u
|
28
|
+
else
|
29
|
+
URI.join(base_url || doc_base_href || source_url, i)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
result
|
34
|
+
end
|
35
|
+
|
36
|
+
def title
|
37
|
+
css('title').text.strip
|
38
|
+
end
|
39
|
+
|
40
|
+
def description
|
41
|
+
if element = xpath("//meta[translate(@name,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz') = 'description' and @content]").first
|
42
|
+
return element.attributes['content'].value.strip
|
43
|
+
end
|
44
|
+
|
45
|
+
css('body p').each do |node|
|
46
|
+
if !node.has_attribute?('style') && node.first_element_child.nil?
|
47
|
+
return node.text.strip
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def canonical_url
|
55
|
+
if element = xpath("//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz') = 'canonical' and @href]").first
|
56
|
+
return element.attributes['href'].value.strip
|
57
|
+
end
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
|
61
|
+
attr_accessor :source_url
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'net/http/persistent'
|
2
|
+
|
3
|
+
module LinkThumbnailer
|
4
|
+
|
5
|
+
class Fetcher
|
6
|
+
|
7
|
+
attr_accessor :url
|
8
|
+
|
9
|
+
def fetch(url, redirect_count = 0)
|
10
|
+
if redirect_count > LinkThumbnailer.configuration.redirect_limit
|
11
|
+
raise ArgumentError, "too many redirects (#{redirect_count})"
|
12
|
+
end
|
13
|
+
|
14
|
+
self.url = url.is_a?(URI) ? url : URI(url)
|
15
|
+
|
16
|
+
if self.url.is_a?(URI::HTTP)
|
17
|
+
http = Net::HTTP::Persistent.new('linkthumbnailer')
|
18
|
+
http.headers['User-Agent'] = LinkThumbnailer.configuration.user_agent
|
19
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE unless LinkThumbnailer.configuration.verify_ssl
|
20
|
+
http.open_timeout = LinkThumbnailer.configuration.http_timeout
|
21
|
+
resp = http.request(self.url)
|
22
|
+
case resp
|
23
|
+
when Net::HTTPSuccess then resp.body
|
24
|
+
when Net::HTTPRedirection
|
25
|
+
location = resp['location'].start_with?('http') ? resp['location'] : "#{self.url.scheme}://#{self.url.host}#{resp['location']}"
|
26
|
+
fetch(location, redirect_count + 1)
|
27
|
+
else resp.error!
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module LinkThumbnailer
|
2
|
+
|
3
|
+
module ImgComparator
|
4
|
+
|
5
|
+
def <=> other
|
6
|
+
result = ([other.rows, other.columns].min ** 2) <=>
|
7
|
+
([rows, columns].min ** 2)
|
8
|
+
|
9
|
+
if result == 0
|
10
|
+
result = other.number_colors <=> number_colors
|
11
|
+
end
|
12
|
+
|
13
|
+
result
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'RMagick'
|
2
|
+
|
3
|
+
module LinkThumbnailer
|
4
|
+
|
5
|
+
class ImgParser
|
6
|
+
|
7
|
+
def initialize(fetcher, img_url_filter)
|
8
|
+
@fetcher = fetcher
|
9
|
+
@img_url_filters = [*img_url_filter]
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse(img_urls)
|
13
|
+
@img_url_filters.each do |filter|
|
14
|
+
img_urls.delete_if { |i| filter.reject?(i) }
|
15
|
+
end
|
16
|
+
|
17
|
+
imgs = []
|
18
|
+
count = 0
|
19
|
+
img_urls.each { |i|
|
20
|
+
break if count >= LinkThumbnailer.configuration.limit
|
21
|
+
img = parse_one(i)
|
22
|
+
next unless img
|
23
|
+
img.extend LinkThumbnailer::ImgComparator
|
24
|
+
imgs << img
|
25
|
+
count += 1
|
26
|
+
}
|
27
|
+
|
28
|
+
imgs.sort! unless imgs.count <= 1
|
29
|
+
|
30
|
+
imgs.first(LinkThumbnailer.configuration.top)
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse_one(img_url)
|
34
|
+
img_data = @fetcher.fetch(img_url)
|
35
|
+
img = Magick::ImageList.new.from_blob(img_data).extend(
|
36
|
+
LinkThumbnailer::WebImage
|
37
|
+
)
|
38
|
+
img.source_url = img_url
|
39
|
+
img
|
40
|
+
rescue StandardError
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module LinkThumbnailer
|
5
|
+
class Object < Hashie::Mash
|
6
|
+
|
7
|
+
def method_missing(method_name, *args, &block)
|
8
|
+
method_name = method_name.to_s
|
9
|
+
|
10
|
+
if method_name.end_with?('?')
|
11
|
+
method_name.chop!
|
12
|
+
!self[method_name].nil?
|
13
|
+
else
|
14
|
+
self[method_name]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def valid?
|
19
|
+
return false if self.keys.empty?
|
20
|
+
LinkThumbnailer.configuration.mandatory_attributes.each {|a| return false if self[a].nil? || self[a].empty? } if LinkThumbnailer.configuration.strict
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_hash
|
25
|
+
if self.images.none? {|i| i.is_a?(String)}
|
26
|
+
super.merge('images' => self.images.map(&:to_hash))
|
27
|
+
else
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_json
|
33
|
+
if self.images.none? {|i| i.is_a?(String)}
|
34
|
+
JSON.generate(self.to_hash.merge('images' => self.images.map(&:to_hash)))
|
35
|
+
else
|
36
|
+
JSON.generate(self.to_hash)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module LinkThumbnailer
|
2
|
+
class Opengraph
|
3
|
+
|
4
|
+
def self.parse(object, doc)
|
5
|
+
doc.css('meta').each do |m|
|
6
|
+
if m.attribute('property') && m.attribute('property').to_s.match(/^og:(.+)$/i)
|
7
|
+
object[$1.gsub('-', '_')] = m.attribute('content').to_s
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
object[:images] = []
|
12
|
+
if object[:image]
|
13
|
+
object[:images] << { source_url: object[:image] }
|
14
|
+
end
|
15
|
+
|
16
|
+
object
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'link_thumbnailer/rails/routes/mapping'
|
2
|
+
require 'link_thumbnailer/rails/routes/mapper'
|
3
|
+
|
4
|
+
module LinkThumbnailer
|
5
|
+
module Rails
|
6
|
+
class Routes
|
7
|
+
|
8
|
+
module Helper
|
9
|
+
def use_link_thumbnailer(options = {}, &block)
|
10
|
+
LinkThumbnailer::Rails::Routes.new(self, &block).generate_routes!(options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.install!
|
15
|
+
ActionDispatch::Routing::Mapper.send(:include, LinkThumbnailer::Rails::Routes::Helper)
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_accessor :routes
|
19
|
+
|
20
|
+
def initialize(routes, &options)
|
21
|
+
@routes, @options = routes, options
|
22
|
+
end
|
23
|
+
|
24
|
+
def generate_routes!(options)
|
25
|
+
@mapping = Mapper.new.map(&@options)
|
26
|
+
routes.scope 'link', as: 'link' do
|
27
|
+
map_route(:previews, :preview_routes)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def map_route(name, method)
|
34
|
+
unless @mapping.skipped?(name)
|
35
|
+
send method, @mapping[name]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def preview_routes(mapping)
|
40
|
+
routes.scope controller: mapping[:controllers] do
|
41
|
+
routes.match 'preview', via: :post, action: :create, as: mapping[:as], defaults: { format: 'json' }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module LinkThumbnailer
|
2
|
+
module Rails
|
3
|
+
class Routes
|
4
|
+
class Mapper
|
5
|
+
|
6
|
+
def initialize(mapping = Mapping.new)
|
7
|
+
@mapping = mapping
|
8
|
+
end
|
9
|
+
|
10
|
+
def map(&block)
|
11
|
+
self.instance_eval(&block) if block
|
12
|
+
@mapping
|
13
|
+
end
|
14
|
+
|
15
|
+
def controllers(controller_names = {})
|
16
|
+
@mapping.controllers.merge!(controller_names)
|
17
|
+
end
|
18
|
+
|
19
|
+
def skip_controllers(*controller_names)
|
20
|
+
@mapping.skips = controller_names
|
21
|
+
end
|
22
|
+
|
23
|
+
def as(alias_names = {})
|
24
|
+
@mapping.as.merge!(alias_names)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module LinkThumbnailer
|
2
|
+
module Rails
|
3
|
+
class Routes
|
4
|
+
class Mapping
|
5
|
+
|
6
|
+
attr_accessor :controllers, :as, :skips
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@controllers = {
|
10
|
+
previews: 'link_thumbnailer/previews'
|
11
|
+
}
|
12
|
+
|
13
|
+
@as = {
|
14
|
+
previews: :preview
|
15
|
+
}
|
16
|
+
|
17
|
+
@skips = []
|
18
|
+
end
|
19
|
+
|
20
|
+
def [](routes)
|
21
|
+
{
|
22
|
+
controllers: @controllers[routes],
|
23
|
+
as: @as[routes]
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
def skipped?(controller)
|
28
|
+
@skips.include?(controller)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module LinkThumbnailer
|
2
|
+
module WebImage
|
3
|
+
|
4
|
+
attr_accessor :source_url, :doc
|
5
|
+
|
6
|
+
def to_hash
|
7
|
+
result = {}
|
8
|
+
LinkThumbnailer.configuration.rmagick_attributes.each {|m|
|
9
|
+
k = m.to_sym
|
10
|
+
result[k] = self.send(m) if self.respond_to?(m)
|
11
|
+
result[k] = result[k].to_s if result[k].is_a?(URI)
|
12
|
+
}
|
13
|
+
|
14
|
+
result
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/link_thumbnailer/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Pierre-Louis Gottfrois"]
|
6
|
+
gem.email = ["pierrelouis.gottfrois@gmail.com"]
|
7
|
+
gem.description = %q{Ruby gem generating thumbnail images from a given URL.}
|
8
|
+
gem.summary = %q{Ruby gem ranking images from a given URL returning an object containing images and website informations.}
|
9
|
+
gem.homepage = "https://github.com/gottfrois/link_thumbnailer"
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "rcarvalho-link_thumbnailer"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = LinkThumbnailer::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency 'rake', '>= 0.9'
|
19
|
+
gem.add_dependency 'nokogiri', '>= 1.5.5'
|
20
|
+
gem.add_dependency 'hashie', '>= 1.2.0'
|
21
|
+
gem.add_dependency 'net-http-persistent', '>= 2.7'
|
22
|
+
gem.add_dependency 'rmagick', '>= 2.13.1'
|
23
|
+
gem.add_dependency 'json', '>= 1.7.6'
|
24
|
+
|
25
|
+
gem.add_development_dependency 'bundler', '>= 1.3'
|
26
|
+
gem.add_development_dependency 'rspec', '>= 2.14'
|
27
|
+
gem.add_development_dependency 'pry', '>= 0.9'
|
28
|
+
end
|