dq-readability 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/dq-readability.gemspec +1 -1
- data/lib/dq-readability.rb +22 -22
- metadata +11 -11
data/dq-readability.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "dq-readability"
|
6
|
-
s.version = '1.0.
|
6
|
+
s.version = '1.0.1'
|
7
7
|
s.authors = ["Prateek Papriwal"]
|
8
8
|
s.email = ["papriwalprateek@gmail.com"]
|
9
9
|
s.homepage = "http://github.com/DaQwest/dq-readability"
|
data/lib/dq-readability.rb
CHANGED
@@ -481,28 +481,28 @@ module DQReadability
|
|
481
481
|
to_remove = false
|
482
482
|
reason = ""
|
483
483
|
|
484
|
-
if (counts["img"] > counts["p"]) && (counts["img"] > 1)
|
485
|
-
reason = "too many images"
|
486
|
-
to_remove = true
|
487
|
-
elsif counts["li"] > counts["p"] && name != "ul" && name != "ol"
|
488
|
-
reason = "more <li>s than <p>s"
|
489
|
-
to_remove = true
|
490
|
-
|
491
|
-
reason = "less than 3x <p>s than <input>s"
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
to_remove = true
|
496
|
-
|
497
|
-
reason = "too many links for its weight (#{weight})"
|
498
|
-
to_remove = true
|
499
|
-
elsif weight >= 25 && link_density > 0.5
|
500
|
-
reason = "too many links for its weight (#{weight})"
|
501
|
-
to_remove = true
|
502
|
-
elsif (counts["embed"] == 1 && content_length < 75) || counts["embed"] > 1
|
503
|
-
reason = "<embed>s with too short a content length, or too many <embed>s"
|
504
|
-
to_remove = true
|
505
|
-
end
|
484
|
+
# if (counts["img"] > counts["p"]) && (counts["img"] > 1)
|
485
|
+
# reason = "too many images"
|
486
|
+
# to_remove = true
|
487
|
+
# elsif counts["li"] > counts["p"] && name != "ul" && name != "ol"
|
488
|
+
# reason = "more <li>s than <p>s"
|
489
|
+
# to_remove = true
|
490
|
+
# elsif counts["input"] > (counts["p"] / 3).to_i
|
491
|
+
# reason = "less than 3x <p>s than <input>s"
|
492
|
+
# to_remove = true
|
493
|
+
# elsif (content_length < options[:min_text_length]) && (counts["img"] != 1)
|
494
|
+
# reason = "too short a content length without a single image"
|
495
|
+
# to_remove = true
|
496
|
+
# elsif weight < 25 && link_density > 0.2
|
497
|
+
# reason = "too many links for its weight (#{weight})"
|
498
|
+
# to_remove = true
|
499
|
+
# elsif weight >= 25 && link_density > 0.5
|
500
|
+
# reason = "too many links for its weight (#{weight})"
|
501
|
+
# to_remove = true
|
502
|
+
# elsif (counts["embed"] == 1 && content_length < 75) || counts["embed"] > 1
|
503
|
+
# reason = "<embed>s with too short a content length, or too many <embed>s"
|
504
|
+
# to_remove = true
|
505
|
+
# end
|
506
506
|
|
507
507
|
if to_remove
|
508
508
|
debug("Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because it has #{reason}.")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dq-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2014-01-22 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &11035780 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '2.8'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *11035780
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec-expectations
|
27
|
-
requirement: &
|
27
|
+
requirement: &11034780 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '2.8'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *11034780
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rr
|
38
|
-
requirement: &
|
38
|
+
requirement: &11034020 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '1.0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *11034020
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: nokogiri
|
49
|
-
requirement: &
|
49
|
+
requirement: &11033240 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.4.2
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *11033240
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: guess_html_encoding
|
60
|
-
requirement: &
|
60
|
+
requirement: &11032640 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: 0.0.4
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *11032640
|
69
69
|
description: Port of arc90's readability project to ruby. The base code is derived
|
70
70
|
from https://github.com/cantino/ruby-readability
|
71
71
|
email:
|