link_thumbnailer 3.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +334 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +210 -0
- data/Rakefile +9 -0
- data/lib/generators/link_thumbnailer/install_generator.rb +17 -0
- data/lib/generators/templates/initializer.rb +89 -0
- data/lib/link_thumbnailer.rb +38 -0
- data/lib/link_thumbnailer/configuration.rb +72 -0
- data/lib/link_thumbnailer/exceptions.rb +11 -0
- data/lib/link_thumbnailer/grader.rb +43 -0
- data/lib/link_thumbnailer/graders/base.rb +39 -0
- data/lib/link_thumbnailer/graders/html_attribute.rb +48 -0
- data/lib/link_thumbnailer/graders/length.rb +37 -0
- data/lib/link_thumbnailer/graders/link_density.rb +20 -0
- data/lib/link_thumbnailer/graders/position.rb +13 -0
- data/lib/link_thumbnailer/image_comparator.rb +26 -0
- data/lib/link_thumbnailer/image_comparators/base.rb +19 -0
- data/lib/link_thumbnailer/image_comparators/size.rb +13 -0
- data/lib/link_thumbnailer/image_parser.rb +62 -0
- data/lib/link_thumbnailer/image_validator.rb +32 -0
- data/lib/link_thumbnailer/model.rb +20 -0
- data/lib/link_thumbnailer/models/description.rb +37 -0
- data/lib/link_thumbnailer/models/favicon.rb +27 -0
- data/lib/link_thumbnailer/models/image.rb +56 -0
- data/lib/link_thumbnailer/models/title.rb +22 -0
- data/lib/link_thumbnailer/models/video.rb +44 -0
- data/lib/link_thumbnailer/models/website.rb +54 -0
- data/lib/link_thumbnailer/page.rb +43 -0
- data/lib/link_thumbnailer/parser.rb +15 -0
- data/lib/link_thumbnailer/processor.rb +128 -0
- data/lib/link_thumbnailer/railtie.rb +6 -0
- data/lib/link_thumbnailer/response.rb +39 -0
- data/lib/link_thumbnailer/scraper.rb +62 -0
- data/lib/link_thumbnailer/scrapers/base.rb +69 -0
- data/lib/link_thumbnailer/scrapers/default/base.rb +12 -0
- data/lib/link_thumbnailer/scrapers/default/description.rb +49 -0
- data/lib/link_thumbnailer/scrapers/default/favicon.rb +38 -0
- data/lib/link_thumbnailer/scrapers/default/images.rb +78 -0
- data/lib/link_thumbnailer/scrapers/default/title.rb +27 -0
- data/lib/link_thumbnailer/scrapers/default/videos.rb +18 -0
- data/lib/link_thumbnailer/scrapers/opengraph/base.rb +45 -0
- data/lib/link_thumbnailer/scrapers/opengraph/description.rb +12 -0
- data/lib/link_thumbnailer/scrapers/opengraph/favicon.rb +17 -0
- data/lib/link_thumbnailer/scrapers/opengraph/image.rb +107 -0
- data/lib/link_thumbnailer/scrapers/opengraph/images.rb +18 -0
- data/lib/link_thumbnailer/scrapers/opengraph/title.rb +12 -0
- data/lib/link_thumbnailer/scrapers/opengraph/video.rb +115 -0
- data/lib/link_thumbnailer/scrapers/opengraph/videos.rb +18 -0
- data/lib/link_thumbnailer/uri.rb +20 -0
- data/lib/link_thumbnailer/version.rb +5 -0
- data/lib/link_thumbnailer/video_parser.rb +47 -0
- data/link_thumbnailer.gemspec +29 -0
- data/spec/configuration_spec.rb +61 -0
- data/spec/fixture_spec.rb +114 -0
- data/spec/fixtures/bar.png +2907 -0
- data/spec/fixtures/default_from_body.html +13 -0
- data/spec/fixtures/default_from_meta.html +12 -0
- data/spec/fixtures/foo.png +0 -0
- data/spec/fixtures/google_shift_jis.html +6 -0
- data/spec/fixtures/google_utf8.html +6 -0
- data/spec/fixtures/og_not_valid_example.html +12 -0
- data/spec/fixtures/og_valid_example.html +18 -0
- data/spec/fixtures/og_valid_multi_image_example.html +13 -0
- data/spec/fixtures/og_valid_multi_video_example.html +13 -0
- data/spec/grader_spec.rb +27 -0
- data/spec/graders/base_spec.rb +14 -0
- data/spec/graders/html_attribute_spec.rb +50 -0
- data/spec/graders/length_spec.rb +93 -0
- data/spec/graders/link_density_spec.rb +52 -0
- data/spec/graders/position_spec.rb +49 -0
- data/spec/image_comparators/size_spec.rb +58 -0
- data/spec/image_validator_spec.rb +37 -0
- data/spec/model_spec.rb +27 -0
- data/spec/models/description_spec.rb +66 -0
- data/spec/models/favicon_spec.rb +12 -0
- data/spec/models/image_spec.rb +95 -0
- data/spec/models/title_spec.rb +26 -0
- data/spec/models/video_spec.rb +49 -0
- data/spec/models/website_spec.rb +51 -0
- data/spec/page_spec.rb +28 -0
- data/spec/processor_spec.rb +410 -0
- data/spec/response_spec.rb +62 -0
- data/spec/scraper_spec.rb +70 -0
- data/spec/scrapers/base_spec.rb +69 -0
- data/spec/scrapers/opengraph/base_spec.rb +96 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/uri_spec.rb +44 -0
- data/spec/video_parser_spec.rb +148 -0
- metadata +271 -0
data/Rakefile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module Generators
|
|
5
|
+
class InstallGenerator < ::Rails::Generators::Base
|
|
6
|
+
|
|
7
|
+
source_root File.expand_path('../../templates', __FILE__)
|
|
8
|
+
|
|
9
|
+
desc 'Creates a LinkThumbnailer initializer for your application.'
|
|
10
|
+
|
|
11
|
+
def copy_initializer
|
|
12
|
+
template 'initializer.rb', 'config/initializers/link_thumbnailer.rb'
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Use this hook to configure LinkThumbnailer bahaviors.
|
|
4
|
+
LinkThumbnailer.configure do |config|
|
|
5
|
+
# Numbers of redirects before raising an exception when trying to parse given url.
|
|
6
|
+
#
|
|
7
|
+
# config.redirect_limit = 3
|
|
8
|
+
|
|
9
|
+
# Set user agent
|
|
10
|
+
#
|
|
11
|
+
# config.user_agent = 'link_thumbnailer'
|
|
12
|
+
|
|
13
|
+
# Enable or disable SSL verification
|
|
14
|
+
#
|
|
15
|
+
# config.verify_ssl = true
|
|
16
|
+
|
|
17
|
+
# The amount of time in seconds to wait for a connection to be opened.
|
|
18
|
+
# If the HTTP object cannot open a connection in this many seconds,
|
|
19
|
+
# it raises a Net::OpenTimeout exception.
|
|
20
|
+
#
|
|
21
|
+
# See http://www.ruby-doc.org/stdlib-2.1.1/libdoc/net/http/rdoc/Net/HTTP.html#open_timeout
|
|
22
|
+
#
|
|
23
|
+
# config.http_open_timeout = 5
|
|
24
|
+
|
|
25
|
+
# List of blacklisted urls you want to skip when searching for images.
|
|
26
|
+
#
|
|
27
|
+
# config.blacklist_urls = [
|
|
28
|
+
# %r{^http://ad\.doubleclick\.net/},
|
|
29
|
+
# %r{^http://b\.scorecardresearch\.com/},
|
|
30
|
+
# %r{^http://pixel\.quantserve\.com/},
|
|
31
|
+
# %r{^http://s7\.addthis\.com/}
|
|
32
|
+
# ]
|
|
33
|
+
|
|
34
|
+
# List of attributes you want LinkThumbnailer to fetch on a website.
|
|
35
|
+
#
|
|
36
|
+
# config.attributes = [:title, :images, :description, :videos, :favicon]
|
|
37
|
+
|
|
38
|
+
# List of procedures used to rate the website description. Add you custom class
|
|
39
|
+
# here. See wiki for more details on how to build your own graders.
|
|
40
|
+
#
|
|
41
|
+
# config.graders = [
|
|
42
|
+
# ->(description) { ::LinkThumbnailer::Graders::Length.new(description) },
|
|
43
|
+
# ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
|
|
44
|
+
# ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
|
|
45
|
+
# ->(description) { ::LinkThumbnailer::Graders::Position.new(description, weight: 3) },
|
|
46
|
+
# ->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) }
|
|
47
|
+
# ]
|
|
48
|
+
|
|
49
|
+
# Minimum description length for a website.
|
|
50
|
+
#
|
|
51
|
+
# config.description_min_length = 25
|
|
52
|
+
|
|
53
|
+
# Regex of words considered positive to rate website description.
|
|
54
|
+
#
|
|
55
|
+
# config.positive_regex = /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
|
|
56
|
+
|
|
57
|
+
# Regex of words considered negative to rate website description.
|
|
58
|
+
#
|
|
59
|
+
# config.negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i
|
|
60
|
+
|
|
61
|
+
# Numbers of images to fetch. Fetching too many images will be slow.
|
|
62
|
+
# Note that LinkThumbnailer will only sort fetched images between each other.
|
|
63
|
+
# Meaning that they could be a "better" image on the page.
|
|
64
|
+
#
|
|
65
|
+
# config.image_limit = 5
|
|
66
|
+
|
|
67
|
+
# Whether you want LinkThumbnailer to return image size and type or not.
|
|
68
|
+
# Setting this value to false will increase performance since for each images, LinkThumbnailer
|
|
69
|
+
# does not have to fetch its size and type.
|
|
70
|
+
#
|
|
71
|
+
# config.image_stats = true
|
|
72
|
+
|
|
73
|
+
# Whether you want LinkThumbnailer to raise an exception if the Content-Type of the HTTP request
|
|
74
|
+
# is not an html or xml.
|
|
75
|
+
#
|
|
76
|
+
# config.raise_on_invalid_format = false
|
|
77
|
+
|
|
78
|
+
# Sets number of concurrent http connections that can be opened to fetch images informations such as size and type.
|
|
79
|
+
#
|
|
80
|
+
# config.max_concurrency = 20
|
|
81
|
+
|
|
82
|
+
# Defines the strategies to use to scrap the website. See the [Open Graph Protocol](http://ogp.me/) for more information.
|
|
83
|
+
#
|
|
84
|
+
# config.scrapers = [:opengraph, :default]
|
|
85
|
+
|
|
86
|
+
# Sets the default encoding.
|
|
87
|
+
#
|
|
88
|
+
# config.encoding = 'utf-8'
|
|
89
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'link_thumbnailer/version'
|
|
5
|
+
require 'link_thumbnailer/configuration'
|
|
6
|
+
require 'link_thumbnailer/exceptions'
|
|
7
|
+
require 'link_thumbnailer/page'
|
|
8
|
+
|
|
9
|
+
module LinkThumbnailer
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
|
|
13
|
+
attr_reader :page
|
|
14
|
+
|
|
15
|
+
def generate(url, options = {})
|
|
16
|
+
@page = ::LinkThumbnailer::Page.new(url, options)
|
|
17
|
+
|
|
18
|
+
page.generate
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
begin
|
|
26
|
+
require 'rails'
|
|
27
|
+
rescue LoadError
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
$stderr.puts <<-EOC if !defined?(Rails)
|
|
31
|
+
warning: no framework detected.
|
|
32
|
+
|
|
33
|
+
Your Gemfile might not be configured properly.
|
|
34
|
+
---- e.g. ----
|
|
35
|
+
Rails:
|
|
36
|
+
gem 'link_thumbnailer'
|
|
37
|
+
|
|
38
|
+
EOC
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
|
|
5
|
+
# Access point for the gem configurations.
|
|
6
|
+
#
|
|
7
|
+
# @return [LinkThumbnailer::Configuration] a configuration instance.
|
|
8
|
+
def self.config
|
|
9
|
+
@config ||= Configuration.new
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Configure hook used in the gem initializer. Convinient way to set all the
|
|
13
|
+
# gem configurations.
|
|
14
|
+
#
|
|
15
|
+
# @example inside config/initializers/link_thumbnaler.rb
|
|
16
|
+
# LinkThumbnailer.configure do |config|
|
|
17
|
+
# config.user_agent = 'link_thumbnailer'
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# @return [void]
|
|
21
|
+
def self.configure
|
|
22
|
+
yield config if block_given?
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class Configuration
|
|
26
|
+
|
|
27
|
+
attr_accessor :redirect_limit, :blacklist_urls, :user_agent,
|
|
28
|
+
:verify_ssl, :http_open_timeout, :http_read_timeout, :attributes,
|
|
29
|
+
:graders, :description_min_length, :positive_regex, :negative_regex,
|
|
30
|
+
:image_limit, :image_stats, :raise_on_invalid_format, :max_concurrency,
|
|
31
|
+
:scrapers, :http_override_headers, :encoding
|
|
32
|
+
|
|
33
|
+
alias_method :http_timeout, :http_open_timeout
|
|
34
|
+
alias_method :http_timeout=, :http_open_timeout=
|
|
35
|
+
|
|
36
|
+
# Create a new instance.
|
|
37
|
+
#
|
|
38
|
+
# @return [LinkThumbnailer::Configuration]
|
|
39
|
+
def initialize
|
|
40
|
+
@redirect_limit = 3
|
|
41
|
+
@user_agent = 'link_thumbnailer'
|
|
42
|
+
@verify_ssl = true
|
|
43
|
+
@http_open_timeout = 5
|
|
44
|
+
@http_read_timeout = 5
|
|
45
|
+
@blacklist_urls = [
|
|
46
|
+
%r{^http://ad\.doubleclick\.net/},
|
|
47
|
+
%r{^http://b\.scorecardresearch\.com/},
|
|
48
|
+
%r{^http://pixel\.quantserve\.com/},
|
|
49
|
+
%r{^http://s7\.addthis\.com/}
|
|
50
|
+
]
|
|
51
|
+
@attributes = [:title, :images, :description, :videos, :favicon]
|
|
52
|
+
@graders = [
|
|
53
|
+
->(description) { ::LinkThumbnailer::Graders::Length.new(description) },
|
|
54
|
+
->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
|
|
55
|
+
->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
|
|
56
|
+
->(description) { ::LinkThumbnailer::Graders::Position.new(description, weigth: 3) },
|
|
57
|
+
->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) },
|
|
58
|
+
]
|
|
59
|
+
@description_min_length = 50
|
|
60
|
+
@positive_regex = /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
|
|
61
|
+
@negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i
|
|
62
|
+
@image_limit = 5
|
|
63
|
+
@image_stats = true
|
|
64
|
+
@raise_on_invalid_format = false
|
|
65
|
+
@max_concurrency = 20
|
|
66
|
+
@scrapers = [:opengraph, :default]
|
|
67
|
+
@http_override_headers = { 'Accept-Encoding' => 'none' }
|
|
68
|
+
@encoding = 'utf-8'
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
Exceptions = Class.new(StandardError)
|
|
5
|
+
RedirectLimit = Class.new(Exceptions)
|
|
6
|
+
BadUriFormat = Class.new(Exceptions)
|
|
7
|
+
FormatNotSupported = Class.new(Exceptions)
|
|
8
|
+
ScraperInvalid = Class.new(Exceptions)
|
|
9
|
+
HTTPError = Class.new(Exceptions)
|
|
10
|
+
SyntaxError = Class.new(Exceptions)
|
|
11
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'delegate'
|
|
4
|
+
require 'link_thumbnailer/graders/base'
|
|
5
|
+
require 'link_thumbnailer/graders/length'
|
|
6
|
+
require 'link_thumbnailer/graders/html_attribute'
|
|
7
|
+
require 'link_thumbnailer/graders/link_density'
|
|
8
|
+
require 'link_thumbnailer/graders/position'
|
|
9
|
+
|
|
10
|
+
module LinkThumbnailer
|
|
11
|
+
class Grader < ::SimpleDelegator
|
|
12
|
+
|
|
13
|
+
attr_reader :config, :description
|
|
14
|
+
|
|
15
|
+
def initialize(description)
|
|
16
|
+
@config = ::LinkThumbnailer.page.config
|
|
17
|
+
@description = description
|
|
18
|
+
|
|
19
|
+
super(config)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# For given description, computes probabilities returned by each graders by multipying them together.
|
|
23
|
+
#
|
|
24
|
+
# @return [Float] the probability for the given description to be considered good
|
|
25
|
+
def call
|
|
26
|
+
probability = 1.0
|
|
27
|
+
|
|
28
|
+
graders.each do |lambda|
|
|
29
|
+
instance = lambda.call(description)
|
|
30
|
+
probability *= instance.call.to_f ** instance.weight
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
probability
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def graders
|
|
39
|
+
config.graders
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'delegate'
|
|
4
|
+
|
|
5
|
+
module LinkThumbnailer
|
|
6
|
+
module Graders
|
|
7
|
+
class Base < ::SimpleDelegator
|
|
8
|
+
|
|
9
|
+
attr_reader :config, :description, :options
|
|
10
|
+
|
|
11
|
+
def initialize(description, options = {})
|
|
12
|
+
@config = ::LinkThumbnailer.page.config
|
|
13
|
+
@description = description
|
|
14
|
+
@options = options
|
|
15
|
+
|
|
16
|
+
super(config)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def call
|
|
20
|
+
fail NotImplementedError
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def weight
|
|
24
|
+
options.fetch(:weigth, 1)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def node
|
|
30
|
+
description.node
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def text
|
|
34
|
+
description.text
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module Graders
|
|
5
|
+
class HtmlAttribute < ::LinkThumbnailer::Graders::Base
|
|
6
|
+
|
|
7
|
+
attr_reader :attribute_name
|
|
8
|
+
|
|
9
|
+
def initialize(description, attribute_name)
|
|
10
|
+
super(description)
|
|
11
|
+
@attribute_name = attribute_name.to_sym
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call
|
|
15
|
+
return 1.0 if positive?
|
|
16
|
+
return 0.0 if negative?
|
|
17
|
+
1.0
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def attribute
|
|
23
|
+
node[attribute_name]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def attribute?
|
|
27
|
+
attribute && !attribute.empty?
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def negative?
|
|
31
|
+
attribute? && attribute =~ negative_regex
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def positive?
|
|
35
|
+
attribute? && attribute =~ positive_regex
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def negative_regex
|
|
39
|
+
config.negative_regex
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def positive_regex
|
|
43
|
+
config.positive_regex
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module Graders
|
|
5
|
+
class Length < ::LinkThumbnailer::Graders::Base
|
|
6
|
+
|
|
7
|
+
def call
|
|
8
|
+
return 0.0 if too_short?
|
|
9
|
+
|
|
10
|
+
y / get_gaussian_value_for(ideal_description_length)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def get_gaussian_value_for(x)
|
|
16
|
+
Math.sqrt(2.0 * Math::PI ** 2) * Math.exp(-(x - ideal_description_length) ** 2 / 2.0 * 0.005 ** 2)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def x
|
|
20
|
+
text.length
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def y
|
|
24
|
+
get_gaussian_value_for(x)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def ideal_description_length
|
|
28
|
+
options.fetch(:ideal_description_length, 120).to_f
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def too_short?
|
|
32
|
+
text.length < config.description_min_length
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkThumbnailer
|
|
4
|
+
module Graders
|
|
5
|
+
class LinkDensity < ::LinkThumbnailer::Graders::Base
|
|
6
|
+
|
|
7
|
+
def call
|
|
8
|
+
return 0.0 if text.length == 0
|
|
9
|
+
1.0 - (links.count.to_f / text.length.to_f)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def links
|
|
15
|
+
node.css('a').map(&:text).compact.reject(&:empty?)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|