dq-readability 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/dq-readability.gemspec +1 -1
  2. data/lib/dq-readability.rb +22 -22
  3. metadata +11 -11
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "dq-readability"
6
- s.version = '1.0.0'
6
+ s.version = '1.0.1'
7
7
  s.authors = ["Prateek Papriwal"]
8
8
  s.email = ["papriwalprateek@gmail.com"]
9
9
  s.homepage = "http://github.com/DaQwest/dq-readability"
@@ -481,28 +481,28 @@ module DQReadability
481
481
  to_remove = false
482
482
  reason = ""
483
483
 
484
- if (counts["img"] > counts["p"]) && (counts["img"] > 1)
485
- reason = "too many images"
486
- to_remove = true
487
- elsif counts["li"] > counts["p"] && name != "ul" && name != "ol"
488
- reason = "more <li>s than <p>s"
489
- to_remove = true
490
- elsif counts["input"] > (counts["p"] / 3).to_i
491
- reason = "less than 3x <p>s than <input>s"
492
- to_remove = true
493
- elsif (content_length < options[:min_text_length]) && (counts["img"] != 1)
494
- reason = "too short a content length without a single image"
495
- to_remove = true
496
- elsif weight < 25 && link_density > 0.2
497
- reason = "too many links for its weight (#{weight})"
498
- to_remove = true
499
- elsif weight >= 25 && link_density > 0.5
500
- reason = "too many links for its weight (#{weight})"
501
- to_remove = true
502
- elsif (counts["embed"] == 1 && content_length < 75) || counts["embed"] > 1
503
- reason = "<embed>s with too short a content length, or too many <embed>s"
504
- to_remove = true
505
- end
484
+ # if (counts["img"] > counts["p"]) && (counts["img"] > 1)
485
+ # reason = "too many images"
486
+ # to_remove = true
487
+ # elsif counts["li"] > counts["p"] && name != "ul" && name != "ol"
488
+ # reason = "more <li>s than <p>s"
489
+ # to_remove = true
490
+ # elsif counts["input"] > (counts["p"] / 3).to_i
491
+ # reason = "less than 3x <p>s than <input>s"
492
+ # to_remove = true
493
+ # elsif (content_length < options[:min_text_length]) && (counts["img"] != 1)
494
+ # reason = "too short a content length without a single image"
495
+ # to_remove = true
496
+ # elsif weight < 25 && link_density > 0.2
497
+ # reason = "too many links for its weight (#{weight})"
498
+ # to_remove = true
499
+ # elsif weight >= 25 && link_density > 0.5
500
+ # reason = "too many links for its weight (#{weight})"
501
+ # to_remove = true
502
+ # elsif (counts["embed"] == 1 && content_length < 75) || counts["embed"] > 1
503
+ # reason = "<embed>s with too short a content length, or too many <embed>s"
504
+ # to_remove = true
505
+ # end
506
506
 
507
507
  if to_remove
508
508
  debug("Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because it has #{reason}.")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dq-readability
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2014-01-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &16278660 !ruby/object:Gem::Requirement
16
+ requirement: &11035780 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '2.8'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *16278660
24
+ version_requirements: *11035780
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec-expectations
27
- requirement: &16277660 !ruby/object:Gem::Requirement
27
+ requirement: &11034780 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '2.8'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *16277660
35
+ version_requirements: *11034780
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rr
38
- requirement: &16276900 !ruby/object:Gem::Requirement
38
+ requirement: &11034020 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '1.0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *16276900
46
+ version_requirements: *11034020
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &16276120 !ruby/object:Gem::Requirement
49
+ requirement: &11033240 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.4.2
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *16276120
57
+ version_requirements: *11033240
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guess_html_encoding
60
- requirement: &16275520 !ruby/object:Gem::Requirement
60
+ requirement: &11032640 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: 0.0.4
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *16275520
68
+ version_requirements: *11032640
69
69
  description: Port of arc90's readability project to ruby. The base code is derived
70
70
  from https://github.com/cantino/ruby-readability
71
71
  email: