ruby-readability 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES.markdown +3 -0
- data/lib/readability.rb +49 -0
- data/ruby-readability.gemspec +1 -1
- data/spec/readability_spec.rb +69 -0
- metadata +23 -4
data/CHANGES.markdown
ADDED
data/lib/readability.rb
CHANGED
@@ -131,6 +131,55 @@ module Readability
|
|
131
131
|
title ? title.text : nil
|
132
132
|
end
|
133
133
|
|
134
|
+
# Look through the @html document looking for the author
|
135
|
+
# Precedence Information here on the wiki: (TODO attach wiki URL if it is accepted)
|
136
|
+
# Returns nil if no author is detected
|
137
|
+
def author
|
138
|
+
# Let's grab this author:
|
139
|
+
# <meta name="dc.creator" content="Finch - http://www.getfinch.com" />
|
140
|
+
author_elements = @html.xpath('//meta[@name = "dc.creator"]')
|
141
|
+
unless author_elements.empty?
|
142
|
+
author_elements.each do |element|
|
143
|
+
if element['content']
|
144
|
+
return element['content'].strip
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Now let's try to grab this
|
150
|
+
# <span class="byline author vcard"><span>By</span><cite class="fn">Austin Fonacier</cite></span>
|
151
|
+
# <div class="author">By</div><div class="author vcard"><a class="url fn" href="http://austinlivesinyoapp.com/">Austin Fonacier</a></div>
|
152
|
+
author_elements = @html.xpath('//*[contains(@class, "vcard")]//*[contains(@class, "fn")]')
|
153
|
+
unless author_elements.empty?
|
154
|
+
author_elements.each do |element|
|
155
|
+
if element.text
|
156
|
+
return element.text.strip
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Now let's try to grab this
|
162
|
+
# <a rel="author" href="http://dbanksdesign.com">Danny Banks (rel)</a>
|
163
|
+
# TODO: strip out the (rel)?
|
164
|
+
author_elements = @html.xpath('//a[@rel = "author"]')
|
165
|
+
unless author_elements.empty?
|
166
|
+
author_elements.each do |element|
|
167
|
+
if element.text
|
168
|
+
return element.text.strip
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
author_elements = @html.xpath('//*[@id = "author"]')
|
174
|
+
unless author_elements.empty?
|
175
|
+
author_elements.each do |element|
|
176
|
+
if element.text
|
177
|
+
return element.text.strip
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
134
183
|
def content(remove_unlikely_candidates = :default)
|
135
184
|
@remove_unlikely_candidates = false if remove_unlikely_candidates == false
|
136
185
|
|
data/ruby-readability.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "ruby-readability"
|
6
|
-
s.version = '0.5.
|
6
|
+
s.version = '0.5.5'
|
7
7
|
s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
|
8
8
|
s.email = ["andrew@iterationlabs.com"]
|
9
9
|
s.homepage = "http://github.com/iterationlabs/ruby-readability"
|
data/spec/readability_spec.rb
CHANGED
@@ -128,6 +128,75 @@ describe Readability do
|
|
128
128
|
end
|
129
129
|
end
|
130
130
|
|
131
|
+
describe "author" do
|
132
|
+
it "should pick up <meta name='dc.creator'></meta> as an author" do
|
133
|
+
doc = Readability::Document.new(<<-HTML)
|
134
|
+
<html>
|
135
|
+
<head>
|
136
|
+
<meta name='dc.creator' content='Austin Fonacier' />
|
137
|
+
</head>
|
138
|
+
<body></body>
|
139
|
+
</html>
|
140
|
+
HTML
|
141
|
+
doc.author.should eql("Austin Fonacier")
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should pick up readability's recommended author format" do
|
145
|
+
doc = Readability::Document.new(<<-HTML)
|
146
|
+
<html>
|
147
|
+
<head>
|
148
|
+
</head>
|
149
|
+
<body>
|
150
|
+
<p class="byline author vcard">
|
151
|
+
By <cite class="fn">Austin Fonacier</span>
|
152
|
+
</p>
|
153
|
+
</body>
|
154
|
+
</html>
|
155
|
+
HTML
|
156
|
+
doc.author.should eql("Austin Fonacier")
|
157
|
+
end
|
158
|
+
|
159
|
+
it "should pick up vcard fn" do
|
160
|
+
doc = Readability::Document.new(<<-HTML)
|
161
|
+
<html>
|
162
|
+
<head>
|
163
|
+
</head>
|
164
|
+
<body>
|
165
|
+
<div class="author">By</div>
|
166
|
+
<div class="author vcard">
|
167
|
+
<a class="url fn" href="http://austinlivesinyotests.com/">Austin Fonacier</a>
|
168
|
+
</div>
|
169
|
+
</body>
|
170
|
+
</html>
|
171
|
+
HTML
|
172
|
+
doc.author.should eql("Austin Fonacier")
|
173
|
+
end
|
174
|
+
|
175
|
+
it "should pick up <a rel='author'>" do
|
176
|
+
doc = Readability::Document.new(<<-HTML)
|
177
|
+
<html>
|
178
|
+
<head></head>
|
179
|
+
<body>
|
180
|
+
<a rel="author" href="http://google.com">Danny Banks (rel)</a>
|
181
|
+
</body>
|
182
|
+
</html>
|
183
|
+
HTML
|
184
|
+
doc.author.should eql("Danny Banks (rel)")
|
185
|
+
end
|
186
|
+
|
187
|
+
it "should pick up <div id='author'>" do
|
188
|
+
doc = Readability::Document.new(<<-HTML)
|
189
|
+
<html>
|
190
|
+
<head></head>
|
191
|
+
<body>
|
192
|
+
<div id="author">Austin Fonacier (author)</div>
|
193
|
+
</body>
|
194
|
+
</html>
|
195
|
+
HTML
|
196
|
+
doc.author.should eql("Austin Fonacier (author)")
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
131
200
|
describe "score_node" do
|
132
201
|
before do
|
133
202
|
@doc = Readability::Document.new(<<-HTML)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2012-
|
15
|
+
date: 2012-10-02 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|
@@ -105,6 +105,7 @@ files:
|
|
105
105
|
- .document
|
106
106
|
- .gitignore
|
107
107
|
- .rspec
|
108
|
+
- CHANGES.markdown
|
108
109
|
- Gemfile
|
109
110
|
- README.markdown
|
110
111
|
- Rakefile
|
@@ -150,8 +151,26 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
150
151
|
version: '0'
|
151
152
|
requirements: []
|
152
153
|
rubyforge_project: ruby-readability
|
153
|
-
rubygems_version: 1.8.
|
154
|
+
rubygems_version: 1.8.21
|
154
155
|
signing_key:
|
155
156
|
specification_version: 3
|
156
157
|
summary: Port of arc90's readability project to ruby
|
157
|
-
test_files:
|
158
|
+
test_files:
|
159
|
+
- spec/fixtures/bbc.html
|
160
|
+
- spec/fixtures/cant_read.html
|
161
|
+
- spec/fixtures/images/dim_1416768a.jpg
|
162
|
+
- spec/fixtures/nytimes.html
|
163
|
+
- spec/fixtures/sample.html
|
164
|
+
- spec/fixtures/samples/blogpost_with_links-fragments.rb
|
165
|
+
- spec/fixtures/samples/blogpost_with_links.html
|
166
|
+
- spec/fixtures/samples/channel4-1-fragments.rb
|
167
|
+
- spec/fixtures/samples/channel4-1.html
|
168
|
+
- spec/fixtures/samples/foxnews-india1-fragments.rb
|
169
|
+
- spec/fixtures/samples/foxnews-india1.html
|
170
|
+
- spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb
|
171
|
+
- spec/fixtures/samples/globemail-ottawa-cuts.html
|
172
|
+
- spec/fixtures/should_not_truncate.txt
|
173
|
+
- spec/fixtures/thesun.html
|
174
|
+
- spec/readability_spec.rb
|
175
|
+
- spec/spec.opts
|
176
|
+
- spec/spec_helper.rb
|