ruby-readability 0.5.0.pre → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +5 -3
- data/lib/readability.rb +7 -2
- data/ruby-readability.gemspec +1 -1
- data/spec/readability_spec.rb +8 -0
- metadata +14 -14
data/README
CHANGED
@@ -20,11 +20,13 @@ Options:
|
|
20
20
|
You may provide additions options to Readability::Document.new, including:
|
21
21
|
|
22
22
|
:tags - the base whitelist of tags to sanitize, defaults to %w[div p]
|
23
|
-
:remove_empty_nodes - remove <p> tags that have no text content;
|
23
|
+
:remove_empty_nodes - remove <p> tags that have no text content; also removes p tags that contain only images
|
24
24
|
:attributes - whitelist of allowed attributes
|
25
25
|
:debug - provide debugging output, defaults false
|
26
|
-
:encoding - if this page is of a known encoding, you can specify it; if left
|
27
|
-
|
26
|
+
:encoding - if this page is of a known encoding, you can specify it; if left
|
27
|
+
unspecified, the encoding will be guessed (only in Ruby 1.9.x)
|
28
|
+
:html_headers - in Ruby 1.9.x these will be passed to the guess_html_encoding gem
|
29
|
+
to aid with guessing the HTML encoding
|
28
30
|
|
29
31
|
Readability comes with a command-line tool for experimentation in bin/readability.
|
30
32
|
|
data/lib/readability.rb
CHANGED
@@ -49,10 +49,15 @@ module Readability
|
|
49
49
|
:videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
|
50
50
|
}
|
51
51
|
|
52
|
+
def title
|
53
|
+
title = @html.css("title").first
|
54
|
+
title ? title.text : nil
|
55
|
+
end
|
56
|
+
|
52
57
|
def content(remove_unlikely_candidates = :default)
|
53
58
|
@remove_unlikely_candidates = false if remove_unlikely_candidates == false
|
54
59
|
|
55
|
-
@html.css("script, style").each
|
60
|
+
@html.css("script, style").each(&:remove)
|
56
61
|
|
57
62
|
remove_unlikely_candidates! if @remove_unlikely_candidates
|
58
63
|
transform_misused_divs_into_paragraphs!
|
@@ -127,7 +132,7 @@ module Readability
|
|
127
132
|
end
|
128
133
|
|
129
134
|
def get_link_density(elem)
|
130
|
-
link_length = elem.css("a").map
|
135
|
+
link_length = elem.css("a").map(&:text).join("").length
|
131
136
|
text_length = elem.text.length
|
132
137
|
link_length / text_length.to_f
|
133
138
|
end
|
data/ruby-readability.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "ruby-readability"
|
6
|
-
s.version = '0.5.0
|
6
|
+
s.version = '0.5.0'
|
7
7
|
s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
|
8
8
|
s.email = ["andrew@iterationlabs.com"]
|
9
9
|
s.homepage = "http://github.com/iterationlabs/ruby-readability"
|
data/spec/readability_spec.rb
CHANGED
@@ -163,6 +163,14 @@ describe Readability do
|
|
163
163
|
it "should return the main page content" do
|
164
164
|
@doc.content.should match("Some content")
|
165
165
|
end
|
166
|
+
|
167
|
+
it "should return the page title if present" do
|
168
|
+
@doc.title.should match("title!")
|
169
|
+
|
170
|
+
doc = Readability::Document.new("<html><head></head><body><div><p>Some content</p></div></body>",
|
171
|
+
:min_text_length => 0, :retry_length => 1)
|
172
|
+
doc.title.should be_nil
|
173
|
+
end
|
166
174
|
end
|
167
175
|
|
168
176
|
describe "ignoring sidebars" do
|
metadata
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.5.0
|
5
|
+
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Andrew Cantino
|
@@ -12,11 +12,11 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2012-01-24 00:00:00.000000000Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|
19
|
-
requirement: &
|
19
|
+
requirement: &70232951278200 !ruby/object:Gem::Requirement
|
20
20
|
none: false
|
21
21
|
requirements:
|
22
22
|
- - ! '>='
|
@@ -24,10 +24,10 @@ dependencies:
|
|
24
24
|
version: '2.6'
|
25
25
|
type: :development
|
26
26
|
prerelease: false
|
27
|
-
version_requirements: *
|
27
|
+
version_requirements: *70232951278200
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rr
|
30
|
-
requirement: &
|
30
|
+
requirement: &70232951277680 !ruby/object:Gem::Requirement
|
31
31
|
none: false
|
32
32
|
requirements:
|
33
33
|
- - ! '>='
|
@@ -35,10 +35,10 @@ dependencies:
|
|
35
35
|
version: '1.0'
|
36
36
|
type: :development
|
37
37
|
prerelease: false
|
38
|
-
version_requirements: *
|
38
|
+
version_requirements: *70232951277680
|
39
39
|
- !ruby/object:Gem::Dependency
|
40
40
|
name: nokogiri
|
41
|
-
requirement: &
|
41
|
+
requirement: &70232951277200 !ruby/object:Gem::Requirement
|
42
42
|
none: false
|
43
43
|
requirements:
|
44
44
|
- - ! '>='
|
@@ -46,10 +46,10 @@ dependencies:
|
|
46
46
|
version: 1.4.2
|
47
47
|
type: :runtime
|
48
48
|
prerelease: false
|
49
|
-
version_requirements: *
|
49
|
+
version_requirements: *70232951277200
|
50
50
|
- !ruby/object:Gem::Dependency
|
51
51
|
name: guess_html_encoding
|
52
|
-
requirement: &
|
52
|
+
requirement: &70232951276720 !ruby/object:Gem::Requirement
|
53
53
|
none: false
|
54
54
|
requirements:
|
55
55
|
- - ! '>='
|
@@ -57,7 +57,7 @@ dependencies:
|
|
57
57
|
version: 0.0.2
|
58
58
|
type: :runtime
|
59
59
|
prerelease: false
|
60
|
-
version_requirements: *
|
60
|
+
version_requirements: *70232951276720
|
61
61
|
description: Port of arc90's readability project to ruby
|
62
62
|
email:
|
63
63
|
- andrew@iterationlabs.com
|
@@ -104,12 +104,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
104
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
|
-
- - ! '
|
107
|
+
- - ! '>='
|
108
108
|
- !ruby/object:Gem::Version
|
109
|
-
version:
|
109
|
+
version: '0'
|
110
110
|
requirements: []
|
111
111
|
rubyforge_project: ruby-readability
|
112
|
-
rubygems_version: 1.8.
|
112
|
+
rubygems_version: 1.8.10
|
113
113
|
signing_key:
|
114
114
|
specification_version: 3
|
115
115
|
summary: Port of arc90's readability project to ruby
|