link_thumbnailer 2.6.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa4bcf1a567d87a3fd739645cc72dea9c04badb1
4
- data.tar.gz: b284daca927931ce058f024ec5425c0d3c2aadc2
3
+ metadata.gz: a6f72611485eeb14494ca2ecc6dc7c4a2c1d7723
4
+ data.tar.gz: 8968fef4fb3d981a568a6cd94b0de53cedd8833b
5
5
  SHA512:
6
- metadata.gz: d5c617733e8aca11eef6604eaab3d077077bc7dfaa8f533dcb6297ebb466a9d7fa6dfa29b73d04711d818cb6cde4c1d7f813f4350699402283c5836f9ec5a05d
7
- data.tar.gz: 841dd6b8ca5dc185a0e77b8931c6a5cfc9fe74340f35d528f80d7dbd1af3ede76933c6873d024c3b213361c5d94136469b5a6001f27b13e9c79917100f1f6f39
6
+ metadata.gz: 1aadd1793f33cec74721d013954627a552693d2a50fd1d1a7e630194b6d19fbe945c549797a9edb23bb2d1254c0b3dc77d6f737f41c32a4aa3c7bda3fea3f796
7
+ data.tar.gz: 65631f74a0d44eee946ce64cd3e4f55a1c0fbb55756f17855990c98efc5d161cf543dc34dc7f384414df1d78f469f1f39b8b1f30f15ad8c6b2a6ea07c2413003
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ # 3.0.0
2
+
3
+ - Improved description sorting.
4
+ - Refactored how graders work. More information [here](https://github.com/gottfrois/link_thumbnailer/wiki/How-to-build-your-own-Grader%3F)
5
+
1
6
  # 2.6.1
2
7
 
3
8
  - Fix remove useless dependency
@@ -68,4 +68,9 @@ LinkThumbnailer.configure do |config|
68
68
  # does not have to fetch its size and type.
69
69
  #
70
70
  # config.image_stats = true
71
+ #
72
+ # Whether you want LinkThumbnailer to raise an exception if the Content-Type of the HTTP request
73
+ # is not an html or xml.
74
+ #
75
+ # config.raise_on_invalid_format = false
71
76
  end
@@ -51,9 +51,9 @@ module LinkThumbnailer
51
51
  ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :class) },
52
52
  ->(description) { ::LinkThumbnailer::Graders::HtmlAttribute.new(description, :id) },
53
53
  ->(description) { ::LinkThumbnailer::Graders::Position.new(description) },
54
- ->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) }
54
+ ->(description) { ::LinkThumbnailer::Graders::LinkDensity.new(description) },
55
55
  ]
56
- @description_min_length = 25
56
+ @description_min_length = 50
57
57
  @positive_regex = /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
58
58
  @negative_regex = /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget|modal/i
59
59
  @image_limit = 5
@@ -3,6 +3,7 @@ require 'link_thumbnailer/graders/base'
3
3
  require 'link_thumbnailer/graders/length'
4
4
  require 'link_thumbnailer/graders/html_attribute'
5
5
  require 'link_thumbnailer/graders/link_density'
6
+ require 'link_thumbnailer/graders/punctuation_density'
6
7
  require 'link_thumbnailer/graders/position'
7
8
 
8
9
  module LinkThumbnailer
@@ -17,14 +18,18 @@ module LinkThumbnailer
17
18
  super(config)
18
19
  end
19
20
 
21
+ # For given description, computes probabilities returned by each graders by multipying them together.
22
+ #
23
+ # @return [Float] the probability for the given description to be considered good
20
24
  def call
21
- score = 0
25
+ probability = 1.0
26
+
22
27
  graders.each do |lambda|
23
28
  instance = lambda.call(description)
24
- score += instance.call(score)
29
+ probability *= instance.call.to_f
25
30
  end
26
31
 
27
- score
32
+ probability
28
33
  end
29
34
 
30
35
  private
@@ -13,7 +13,7 @@ module LinkThumbnailer
13
13
  super(config)
14
14
  end
15
15
 
16
- def call(current_score)
16
+ def call
17
17
  fail NotImplementedError
18
18
  end
19
19
 
@@ -24,7 +24,7 @@ module LinkThumbnailer
24
24
  end
25
25
 
26
26
  def text
27
- node.text
27
+ description.text
28
28
  end
29
29
 
30
30
  end
@@ -9,13 +9,10 @@ module LinkThumbnailer
9
9
  @attribute_name = attribute_name.to_sym
10
10
  end
11
11
 
12
- def call(current_score)
13
- return 0 unless attribute?
14
-
15
- score = 0
16
- score -= 25 if negative?
17
- score += 25 if positive?
18
- score
12
+ def call
13
+ return 1.0 if positive?
14
+ return 0.0 if negative?
15
+ 1.0
19
16
  end
20
17
 
21
18
  private
@@ -29,11 +26,11 @@ module LinkThumbnailer
29
26
  end
30
27
 
31
28
  def negative?
32
- attribute =~ negative_regex
29
+ attribute? && attribute =~ negative_regex
33
30
  end
34
31
 
35
32
  def positive?
36
- attribute =~ positive_regex
33
+ attribute? && attribute =~ positive_regex
37
34
  end
38
35
 
39
36
  def negative_regex
@@ -2,14 +2,34 @@ module LinkThumbnailer
2
2
  module Graders
3
3
  class Length < ::LinkThumbnailer::Graders::Base
4
4
 
5
- def call(current_score)
6
- return -Float::INFINITY if too_short?
5
+ def call
6
+ return 0.0 if too_short?
7
7
 
8
- [(text.length / 100).to_i, 3].min
8
+ y / ideal_description_gaussian_value
9
9
  end
10
10
 
11
11
  private
12
12
 
13
+ def get_gaussian_value_for(x)
14
+ Math.sqrt(2.0 * Math::PI ** 2) * Math.exp(-(x - ideal_description_length) ** 2 / 2.0 * 0.005 ** 2)
15
+ end
16
+
17
+ def x
18
+ text.length
19
+ end
20
+
21
+ def y
22
+ get_gaussian_value_for(x)
23
+ end
24
+
25
+ def ideal_description_length
26
+ 120.0
27
+ end
28
+
29
+ def ideal_description_gaussian_value
30
+ 4.442882938158366
31
+ end
32
+
13
33
  def too_short?
14
34
  text.length < config.description_min_length
15
35
  end
@@ -2,22 +2,13 @@ module LinkThumbnailer
2
2
  module Graders
3
3
  class LinkDensity < ::LinkThumbnailer::Graders::Base
4
4
 
5
- def call(current_score)
6
- return 0 if density_ratio == 0
7
- current_score *= density_ratio
5
+ def call
6
+ return 0.0 if text.length == 0
7
+ 1.0 - (links.count.to_f / text.length.to_f)
8
8
  end
9
9
 
10
10
  private
11
11
 
12
- def density
13
- return 0 if text.length == 0
14
- links.length / text.length.to_f
15
- end
16
-
17
- def density_ratio
18
- 1 - density
19
- end
20
-
21
12
  def links
22
13
  node.css('a').map(&:text).compact.reject(&:empty?)
23
14
  end
@@ -2,8 +2,8 @@ module LinkThumbnailer
2
2
  module Graders
3
3
  class Position < ::LinkThumbnailer::Graders::Base
4
4
 
5
- def call(current_score)
6
- 2.0 / description.position
5
+ def call
6
+ 1.0 - (description.position.to_f / description.candidates_number.to_f)
7
7
  end
8
8
 
9
9
  end
@@ -5,14 +5,15 @@ module LinkThumbnailer
5
5
  module Models
6
6
  class Description < ::LinkThumbnailer::Model
7
7
 
8
- attr_reader :node, :text, :position
9
- attr_accessor :score
10
-
11
- def initialize(node, text, position = 1)
12
- @node = node
13
- @text = sanitize(text)
14
- @position = position
15
- @score = compute_score
8
+ attr_reader :node, :text, :position, :candidates_number
9
+ attr_accessor :probability
10
+
11
+ def initialize(node, text, position = 1, candidates_number = 1)
12
+ @node = node
13
+ @text = sanitize(text)
14
+ @position = position
15
+ @candidates_number = candidates_number
16
+ @probability = compute_probability
16
17
  end
17
18
 
18
19
  def to_s
@@ -20,12 +21,12 @@ module LinkThumbnailer
20
21
  end
21
22
 
22
23
  def <=>(other)
23
- score <=> other.score
24
+ probability <=> other.probability
24
25
  end
25
26
 
26
27
  private
27
28
 
28
- def compute_score
29
+ def compute_probability
29
30
  ::LinkThumbnailer::Grader.new(self).call
30
31
  end
31
32
 
@@ -18,7 +18,7 @@ module LinkThumbnailer
18
18
  end
19
19
 
20
20
  def model_from_body
21
- nodes_from_body.each_with_index.map { |node, i| modelize(node, node.text, i + 1) }.sort.last
21
+ nodes_from_body.each_with_index.map { |node, i| modelize(node, node.text, i) }.sort.last
22
22
  end
23
23
 
24
24
  def node_from_meta
@@ -37,8 +37,8 @@ module LinkThumbnailer
37
37
  document.css('p,td')
38
38
  end
39
39
 
40
- def modelize(node, text, i = 1)
41
- model_class.new(node, text, i)
40
+ def modelize(node, text, i = 0)
41
+ model_class.new(node, text, i, nodes_from_body.count)
42
42
  end
43
43
 
44
44
  end
@@ -1,3 +1,3 @@
1
1
  module LinkThumbnailer
2
- VERSION = '2.6.1'
2
+ VERSION = '3.0.0'
3
3
  end
@@ -12,7 +12,7 @@ describe LinkThumbnailer::Configuration do
12
12
  it { expect(instance.blacklist_urls).to_not be_empty }
13
13
  it { expect(instance.attributes).to eq([:title, :images, :description, :videos, :favicon]) }
14
14
  it { expect(instance.graders).to_not be_empty }
15
- it { expect(instance.description_min_length).to eq(25) }
15
+ it { expect(instance.description_min_length).to eq(50) }
16
16
  it { expect(instance.positive_regex).to_not be_nil }
17
17
  it { expect(instance.negative_regex).to_not be_nil }
18
18
  it { expect(instance.image_limit).to eq(5) }
data/spec/grader_spec.rb CHANGED
@@ -7,8 +7,8 @@ describe LinkThumbnailer::Grader do
7
7
 
8
8
  describe '#call' do
9
9
 
10
- let(:score) { 10 }
11
- let(:grader) { double('grader', call: score) }
10
+ let(:probability) { 0.5 }
11
+ let(:grader) { double('grader', call: probability) }
12
12
  let(:lambda) { ->(_) { grader } }
13
13
  let(:graders) { [lambda, lambda] }
14
14
  let(:action) { instance.call }
@@ -17,7 +17,7 @@ describe LinkThumbnailer::Grader do
17
17
  instance.stub(:graders).and_return(graders)
18
18
  end
19
19
 
20
- it { expect(action).to eq(score * graders.size) }
20
+ it { expect(action).to eq(0.5 * 0.5) }
21
21
 
22
22
  end
23
23
 
@@ -2,11 +2,11 @@ require 'spec_helper'
2
2
 
3
3
  describe LinkThumbnailer::Graders::Base do
4
4
 
5
- let(:description) { double('description', node: node) }
5
+ let(:description) { double('description', text: 'foo', node: node) }
6
6
  let(:node) { double('node', text: 'foo') }
7
7
  let(:instance) { described_class.new(description) }
8
8
 
9
9
  it { expect(instance.send(:node)).to eq(description.node) }
10
- it { expect(instance.send(:text)).to eq(description.node.text) }
10
+ it { expect(instance.send(:text)).to eq(description.text) }
11
11
 
12
12
  end
@@ -7,7 +7,7 @@ describe LinkThumbnailer::Graders::HtmlAttribute do
7
7
 
8
8
  describe '#call' do
9
9
 
10
- let(:action) { instance.call(0) }
10
+ let(:action) { instance.call }
11
11
 
12
12
  context 'when current node does not match attribute' do
13
13
 
@@ -15,7 +15,7 @@ describe LinkThumbnailer::Graders::HtmlAttribute do
15
15
  instance.stub(:attribute?).and_return(false)
16
16
  end
17
17
 
18
- it { expect(action).to eq(0) }
18
+ it { expect(action).to eq(1.0) }
19
19
 
20
20
  end
21
21
 
@@ -27,7 +27,7 @@ describe LinkThumbnailer::Graders::HtmlAttribute do
27
27
  instance.stub(:positive?).and_return(false)
28
28
  end
29
29
 
30
- it { expect(action).to eq(-25) }
30
+ it { expect(action).to eq(0.0) }
31
31
 
32
32
  end
33
33
 
@@ -39,7 +39,7 @@ describe LinkThumbnailer::Graders::HtmlAttribute do
39
39
  instance.stub(:positive?).and_return(true)
40
40
  end
41
41
 
42
- it { expect(action).to eq(25) }
42
+ it { expect(action).to eq(1.0) }
43
43
 
44
44
  end
45
45
 
@@ -12,7 +12,7 @@ describe LinkThumbnailer::Graders::Length do
12
12
 
13
13
  describe '#call' do
14
14
 
15
- let(:action) { instance.call(0) }
15
+ let(:action) { instance.call }
16
16
 
17
17
  context 'when text is too short' do
18
18
 
@@ -20,7 +20,7 @@ describe LinkThumbnailer::Graders::Length do
20
20
  instance.stub(:too_short?).and_return(true)
21
21
  end
22
22
 
23
- it { expect(action).to eq(-Float::INFINITY) }
23
+ it { expect(action).to eq(0.0) }
24
24
 
25
25
  end
26
26
 
@@ -31,19 +31,27 @@ describe LinkThumbnailer::Graders::Length do
31
31
  instance.stub(:text).and_return(text)
32
32
  end
33
33
 
34
- context 'when text length is greater than 400' do
34
+ context 'when text length is 80' do
35
35
 
36
- let(:text) { 'f' * 400 }
36
+ let(:text) { 'f' * 80 }
37
37
 
38
- it { expect(action).to eq(3) }
38
+ it { expect(action).to eq(1.0) }
39
39
 
40
40
  end
41
41
 
42
- context 'when text length is less than 300' do
42
+ context 'when text length is 100' do
43
43
 
44
- let(:text) { 'f' * 299 }
44
+ let(:text) { 'f' * 100 }
45
45
 
46
- it { expect(action).to eq(2) }
46
+ it { expect(action).to be < 1.0 }
47
+
48
+ end
49
+
50
+ context 'when text length is 60' do
51
+
52
+ let(:text) { 'f' * 60 }
53
+
54
+ it { expect(action).to be < 1.0 }
47
55
 
48
56
  end
49
57
 
@@ -7,54 +7,41 @@ describe LinkThumbnailer::Graders::LinkDensity do
7
7
 
8
8
  describe '#call' do
9
9
 
10
- let(:previous_score) { 1 }
11
- let(:action) { instance.call(previous_score) }
12
-
13
- context 'when density_ratio is 0' do
14
-
15
- before do
16
- instance.stub(:density_ratio).and_return(0)
17
- end
18
-
19
- it { expect(action).to eq(0) }
10
+ let(:action) { instance.call }
20
11
 
12
+ before do
13
+ instance.stub(:text).and_return(text)
14
+ instance.stub(:links).and_return(links)
21
15
  end
22
16
 
23
- context 'when density is not 0' do
17
+ context 'when text length is 0' do
24
18
 
25
- before do
26
- instance.stub(:density_ratio).and_return(10)
27
- end
19
+ let(:text) { '' }
20
+ let(:links) { [] }
28
21
 
29
- it { expect(action).to eq(10) }
22
+ it { expect(action).to eq(0.0) }
30
23
 
31
24
  end
32
25
 
33
- end
26
+ context 'when text length is > 0' do
34
27
 
35
- describe '#density' do
28
+ let(:text) { 'foo' }
36
29
 
37
- let(:links) { ['foo'] }
38
- let(:action) { instance.send(:density) }
30
+ context 'and links is 0' do
39
31
 
40
- before do
41
- instance.stub(:links).and_return(links)
42
- instance.stub(:text).and_return(text)
43
- end
32
+ let(:links) { [] }
44
33
 
45
- context 'with text' do
34
+ it { expect(action).to eq(1.0) }
46
35
 
47
- let(:text) { 'abcd' }
48
-
49
- it { expect(action).to eq(0.25) }
36
+ end
50
37
 
51
- end
38
+ context 'and links is > 0' do
52
39
 
53
- context 'without text' do
40
+ let(:links) { [1] }
54
41
 
55
- let(:text) { '' }
42
+ it { expect(action).to be_within(0.001).of(0.666) }
56
43
 
57
- it { expect(action).to eq(0) }
44
+ end
58
45
 
59
46
  end
60
47
 
@@ -0,0 +1,47 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::Graders::Position do
4
+
5
+ let(:description) { double('description') }
6
+ let(:instance) { described_class.new(description) }
7
+
8
+ describe '#call' do
9
+
10
+ let(:action) { instance.call }
11
+
12
+ context 'when position is 0' do
13
+
14
+ before do
15
+ description.stub(:position).and_return(0)
16
+ description.stub(:candidates_number).and_return(1)
17
+ end
18
+
19
+ it { expect(action).to eq(1.0) }
20
+
21
+ end
22
+
23
+ context 'when position is 1 over 1 candidates' do
24
+
25
+ before do
26
+ description.stub(:position).and_return(1)
27
+ description.stub(:candidates_number).and_return(1)
28
+ end
29
+
30
+ it { expect(action).to eq(0.0) }
31
+
32
+ end
33
+
34
+ context 'when position is 1 over more than 1 candidates' do
35
+
36
+ before do
37
+ description.stub(:position).and_return(1)
38
+ description.stub(:candidates_number).and_return(2)
39
+ end
40
+
41
+ it { expect(action).to eq(0.5) }
42
+
43
+ end
44
+
45
+ end
46
+
47
+ end
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::Graders::PunctuationDensity do
4
+
5
+ let(:description) { double('description') }
6
+ let(:instance) { described_class.new(description) }
7
+
8
+ describe '#call' do
9
+
10
+ let(:action) { instance.call }
11
+
12
+ before do
13
+ instance.stub(:text).and_return(text)
14
+ instance.stub(:punctuations).and_return(punctuations)
15
+ end
16
+
17
+ context 'when text length is 0' do
18
+
19
+ let(:text) { '' }
20
+ let(:punctuations) { [] }
21
+
22
+ it { expect(action).to eq(0.0) }
23
+
24
+ end
25
+
26
+ context 'when text length is > 0' do
27
+
28
+ let(:text) { 'foo' }
29
+
30
+ context 'and punctuations is 0' do
31
+
32
+ let(:punctuations) { [] }
33
+
34
+ it { expect(action).to eq(1.0) }
35
+
36
+ end
37
+
38
+ context 'and punctuations is > 0' do
39
+
40
+ let(:punctuations) { [1] }
41
+
42
+ it { expect(action).to be_within(0.001).of(0.666) }
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -22,37 +22,37 @@ describe LinkThumbnailer::Models::Description do
22
22
  describe '#<=>' do
23
23
 
24
24
  let(:another_instance) { described_class.new(node, text) }
25
- let(:score) { 5 }
25
+ let(:probability) { 0.5 }
26
26
  let(:action) { instance <=> another_instance }
27
27
 
28
28
  before do
29
- another_instance.score = score
29
+ another_instance.probability = probability
30
30
  end
31
31
 
32
- context 'when instance score is lower' do
32
+ context 'when instance probability is lower' do
33
33
 
34
34
  before do
35
- instance.score = score - 1
35
+ instance.probability = probability - 0.5
36
36
  end
37
37
 
38
38
  it { expect(action).to eq(-1) }
39
39
 
40
40
  end
41
41
 
42
- context 'when instance score is equal' do
42
+ context 'when instance probability is equal' do
43
43
 
44
44
  before do
45
- instance.score = score
45
+ instance.probability = probability
46
46
  end
47
47
 
48
48
  it { expect(action).to eq(0) }
49
49
 
50
50
  end
51
51
 
52
- context 'when instance score is greater' do
52
+ context 'when instance probability is greater' do
53
53
 
54
54
  before do
55
- instance.score = score + 1
55
+ instance.probability = probability + 0.5
56
56
  end
57
57
 
58
58
  it { expect(action).to eq(1) }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: link_thumbnailer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pierre-Louis Gottfrois
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-22 00:00:00.000000000 Z
11
+ date: 2015-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -193,6 +193,8 @@ files:
193
193
  - spec/graders/html_attribute_spec.rb
194
194
  - spec/graders/length_spec.rb
195
195
  - spec/graders/link_density_spec.rb
196
+ - spec/graders/position_spec.rb
197
+ - spec/graders/punctuation_density_spec.rb
196
198
  - spec/image_comparators/size_spec.rb
197
199
  - spec/image_parsers/size_spec.rb
198
200
  - spec/image_parsers/type_spec.rb
@@ -251,6 +253,8 @@ test_files:
251
253
  - spec/graders/html_attribute_spec.rb
252
254
  - spec/graders/length_spec.rb
253
255
  - spec/graders/link_density_spec.rb
256
+ - spec/graders/position_spec.rb
257
+ - spec/graders/punctuation_density_spec.rb
254
258
  - spec/image_comparators/size_spec.rb
255
259
  - spec/image_parsers/size_spec.rb
256
260
  - spec/image_parsers/type_spec.rb