ruby-readability 0.5.0.pre → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +5 -3
- data/lib/readability.rb +7 -2
- data/ruby-readability.gemspec +1 -1
- data/spec/readability_spec.rb +8 -0
- metadata +14 -14
data/README
CHANGED
@@ -20,11 +20,13 @@ Options:
|
|
20
20
|
You may provide additions options to Readability::Document.new, including:
|
21
21
|
|
22
22
|
:tags - the base whitelist of tags to sanitize, defaults to %w[div p]
|
23
|
-
:remove_empty_nodes - remove <p> tags that have no text content;
|
23
|
+
:remove_empty_nodes - remove <p> tags that have no text content; also removes p tags that contain only images
|
24
24
|
:attributes - whitelist of allowed attributes
|
25
25
|
:debug - provide debugging output, defaults false
|
26
|
-
:encoding - if this page is of a known encoding, you can specify it; if left
|
27
|
-
|
26
|
+
:encoding - if this page is of a known encoding, you can specify it; if left
|
27
|
+
unspecified, the encoding will be guessed (only in Ruby 1.9.x)
|
28
|
+
:html_headers - in Ruby 1.9.x these will be passed to the guess_html_encoding gem
|
29
|
+
to aid with guessing the HTML encoding
|
28
30
|
|
29
31
|
Readability comes with a command-line tool for experimentation in bin/readability.
|
30
32
|
|
data/lib/readability.rb
CHANGED
@@ -49,10 +49,15 @@ module Readability
|
|
49
49
|
:videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
|
50
50
|
}
|
51
51
|
|
52
|
+
def title
|
53
|
+
title = @html.css("title").first
|
54
|
+
title ? title.text : nil
|
55
|
+
end
|
56
|
+
|
52
57
|
def content(remove_unlikely_candidates = :default)
|
53
58
|
@remove_unlikely_candidates = false if remove_unlikely_candidates == false
|
54
59
|
|
55
|
-
@html.css("script, style").each
|
60
|
+
@html.css("script, style").each(&:remove)
|
56
61
|
|
57
62
|
remove_unlikely_candidates! if @remove_unlikely_candidates
|
58
63
|
transform_misused_divs_into_paragraphs!
|
@@ -127,7 +132,7 @@ module Readability
|
|
127
132
|
end
|
128
133
|
|
129
134
|
def get_link_density(elem)
|
130
|
-
link_length = elem.css("a").map
|
135
|
+
link_length = elem.css("a").map(&:text).join("").length
|
131
136
|
text_length = elem.text.length
|
132
137
|
link_length / text_length.to_f
|
133
138
|
end
|
data/ruby-readability.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "ruby-readability"
|
6
|
-
s.version = '0.5.0
|
6
|
+
s.version = '0.5.0'
|
7
7
|
s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
|
8
8
|
s.email = ["andrew@iterationlabs.com"]
|
9
9
|
s.homepage = "http://github.com/iterationlabs/ruby-readability"
|
data/spec/readability_spec.rb
CHANGED
@@ -163,6 +163,14 @@ describe Readability do
|
|
163
163
|
it "should return the main page content" do
|
164
164
|
@doc.content.should match("Some content")
|
165
165
|
end
|
166
|
+
|
167
|
+
it "should return the page title if present" do
|
168
|
+
@doc.title.should match("title!")
|
169
|
+
|
170
|
+
doc = Readability::Document.new("<html><head></head><body><div><p>Some content</p></div></body>",
|
171
|
+
:min_text_length => 0, :retry_length => 1)
|
172
|
+
doc.title.should be_nil
|
173
|
+
end
|
166
174
|
end
|
167
175
|
|
168
176
|
describe "ignoring sidebars" do
|
metadata
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.5.0
|
5
|
+
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Andrew Cantino
|
@@ -12,11 +12,11 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2012-01-24 00:00:00.000000000Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|
19
|
-
requirement: &
|
19
|
+
requirement: &70232951278200 !ruby/object:Gem::Requirement
|
20
20
|
none: false
|
21
21
|
requirements:
|
22
22
|
- - ! '>='
|
@@ -24,10 +24,10 @@ dependencies:
|
|
24
24
|
version: '2.6'
|
25
25
|
type: :development
|
26
26
|
prerelease: false
|
27
|
-
version_requirements: *
|
27
|
+
version_requirements: *70232951278200
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rr
|
30
|
-
requirement: &
|
30
|
+
requirement: &70232951277680 !ruby/object:Gem::Requirement
|
31
31
|
none: false
|
32
32
|
requirements:
|
33
33
|
- - ! '>='
|
@@ -35,10 +35,10 @@ dependencies:
|
|
35
35
|
version: '1.0'
|
36
36
|
type: :development
|
37
37
|
prerelease: false
|
38
|
-
version_requirements: *
|
38
|
+
version_requirements: *70232951277680
|
39
39
|
- !ruby/object:Gem::Dependency
|
40
40
|
name: nokogiri
|
41
|
-
requirement: &
|
41
|
+
requirement: &70232951277200 !ruby/object:Gem::Requirement
|
42
42
|
none: false
|
43
43
|
requirements:
|
44
44
|
- - ! '>='
|
@@ -46,10 +46,10 @@ dependencies:
|
|
46
46
|
version: 1.4.2
|
47
47
|
type: :runtime
|
48
48
|
prerelease: false
|
49
|
-
version_requirements: *
|
49
|
+
version_requirements: *70232951277200
|
50
50
|
- !ruby/object:Gem::Dependency
|
51
51
|
name: guess_html_encoding
|
52
|
-
requirement: &
|
52
|
+
requirement: &70232951276720 !ruby/object:Gem::Requirement
|
53
53
|
none: false
|
54
54
|
requirements:
|
55
55
|
- - ! '>='
|
@@ -57,7 +57,7 @@ dependencies:
|
|
57
57
|
version: 0.0.2
|
58
58
|
type: :runtime
|
59
59
|
prerelease: false
|
60
|
-
version_requirements: *
|
60
|
+
version_requirements: *70232951276720
|
61
61
|
description: Port of arc90's readability project to ruby
|
62
62
|
email:
|
63
63
|
- andrew@iterationlabs.com
|
@@ -104,12 +104,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
104
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
|
-
- - ! '
|
107
|
+
- - ! '>='
|
108
108
|
- !ruby/object:Gem::Version
|
109
|
-
version:
|
109
|
+
version: '0'
|
110
110
|
requirements: []
|
111
111
|
rubyforge_project: ruby-readability
|
112
|
-
rubygems_version: 1.8.
|
112
|
+
rubygems_version: 1.8.10
|
113
113
|
signing_key:
|
114
114
|
specification_version: 3
|
115
115
|
summary: Port of arc90's readability project to ruby
|