dq-readability 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/dq-readability.gemspec +1 -1
  2. data/lib/dq-readability.rb +22 -22
  3. metadata +11 -11
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "dq-readability"
6
- s.version = '1.0.0'
6
+ s.version = '1.0.1'
7
7
  s.authors = ["Prateek Papriwal"]
8
8
  s.email = ["papriwalprateek@gmail.com"]
9
9
  s.homepage = "http://github.com/DaQwest/dq-readability"
@@ -481,28 +481,28 @@ module DQReadability
481
481
  to_remove = false
482
482
  reason = ""
483
483
 
484
- if (counts["img"] > counts["p"]) && (counts["img"] > 1)
485
- reason = "too many images"
486
- to_remove = true
487
- elsif counts["li"] > counts["p"] && name != "ul" && name != "ol"
488
- reason = "more <li>s than <p>s"
489
- to_remove = true
490
- elsif counts["input"] > (counts["p"] / 3).to_i
491
- reason = "less than 3x <p>s than <input>s"
492
- to_remove = true
493
- elsif (content_length < options[:min_text_length]) && (counts["img"] != 1)
494
- reason = "too short a content length without a single image"
495
- to_remove = true
496
- elsif weight < 25 && link_density > 0.2
497
- reason = "too many links for its weight (#{weight})"
498
- to_remove = true
499
- elsif weight >= 25 && link_density > 0.5
500
- reason = "too many links for its weight (#{weight})"
501
- to_remove = true
502
- elsif (counts["embed"] == 1 && content_length < 75) || counts["embed"] > 1
503
- reason = "<embed>s with too short a content length, or too many <embed>s"
504
- to_remove = true
505
- end
484
+ # if (counts["img"] > counts["p"]) && (counts["img"] > 1)
485
+ # reason = "too many images"
486
+ # to_remove = true
487
+ # elsif counts["li"] > counts["p"] && name != "ul" && name != "ol"
488
+ # reason = "more <li>s than <p>s"
489
+ # to_remove = true
490
+ # elsif counts["input"] > (counts["p"] / 3).to_i
491
+ # reason = "less than 3x <p>s than <input>s"
492
+ # to_remove = true
493
+ # elsif (content_length < options[:min_text_length]) && (counts["img"] != 1)
494
+ # reason = "too short a content length without a single image"
495
+ # to_remove = true
496
+ # elsif weight < 25 && link_density > 0.2
497
+ # reason = "too many links for its weight (#{weight})"
498
+ # to_remove = true
499
+ # elsif weight >= 25 && link_density > 0.5
500
+ # reason = "too many links for its weight (#{weight})"
501
+ # to_remove = true
502
+ # elsif (counts["embed"] == 1 && content_length < 75) || counts["embed"] > 1
503
+ # reason = "<embed>s with too short a content length, or too many <embed>s"
504
+ # to_remove = true
505
+ # end
506
506
 
507
507
  if to_remove
508
508
  debug("Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because it has #{reason}.")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dq-readability
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2014-01-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &16278660 !ruby/object:Gem::Requirement
16
+ requirement: &11035780 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '2.8'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *16278660
24
+ version_requirements: *11035780
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec-expectations
27
- requirement: &16277660 !ruby/object:Gem::Requirement
27
+ requirement: &11034780 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '2.8'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *16277660
35
+ version_requirements: *11034780
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rr
38
- requirement: &16276900 !ruby/object:Gem::Requirement
38
+ requirement: &11034020 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '1.0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *16276900
46
+ version_requirements: *11034020
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &16276120 !ruby/object:Gem::Requirement
49
+ requirement: &11033240 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.4.2
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *16276120
57
+ version_requirements: *11033240
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guess_html_encoding
60
- requirement: &16275520 !ruby/object:Gem::Requirement
60
+ requirement: &11032640 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: 0.0.4
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *16275520
68
+ version_requirements: *11032640
69
69
  description: Port of arc90's readability project to ruby. The base code is derived
70
70
  from https://github.com/cantino/ruby-readability
71
71
  email: