ruby-readability 0.5.4 → 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGES.markdown ADDED
@@ -0,0 +1,3 @@
1
+ Oct 1, 2012:
2
+
3
+ - Merged in austinrfnd's `author` handling code.
data/lib/readability.rb CHANGED
@@ -131,6 +131,55 @@ module Readability
131
131
  title ? title.text : nil
132
132
  end
133
133
 
134
+ # Look through the @html document looking for the author
135
+ # Precedence Information here on the wiki: (TODO attach wiki URL if it is accepted)
136
+ # Returns nil if no author is detected
137
+ def author
138
+ # Let's grab this author:
139
+ # <meta name="dc.creator" content="Finch - http://www.getfinch.com" />
140
+ author_elements = @html.xpath('//meta[@name = "dc.creator"]')
141
+ unless author_elements.empty?
142
+ author_elements.each do |element|
143
+ if element['content']
144
+ return element['content'].strip
145
+ end
146
+ end
147
+ end
148
+
149
+ # Now let's try to grab this
150
+ # <span class="byline author vcard"><span>By</span><cite class="fn">Austin Fonacier</cite></span>
151
+ # <div class="author">By</div><div class="author vcard"><a class="url fn" href="http://austinlivesinyoapp.com/">Austin Fonacier</a></div>
152
+ author_elements = @html.xpath('//*[contains(@class, "vcard")]//*[contains(@class, "fn")]')
153
+ unless author_elements.empty?
154
+ author_elements.each do |element|
155
+ if element.text
156
+ return element.text.strip
157
+ end
158
+ end
159
+ end
160
+
161
+ # Now let's try to grab this
162
+ # <a rel="author" href="http://dbanksdesign.com">Danny Banks (rel)</a>
163
+ # TODO: strip out the (rel)?
164
+ author_elements = @html.xpath('//a[@rel = "author"]')
165
+ unless author_elements.empty?
166
+ author_elements.each do |element|
167
+ if element.text
168
+ return element.text.strip
169
+ end
170
+ end
171
+ end
172
+
173
+ author_elements = @html.xpath('//*[@id = "author"]')
174
+ unless author_elements.empty?
175
+ author_elements.each do |element|
176
+ if element.text
177
+ return element.text.strip
178
+ end
179
+ end
180
+ end
181
+ end
182
+
134
183
  def content(remove_unlikely_candidates = :default)
135
184
  @remove_unlikely_candidates = false if remove_unlikely_candidates == false
136
185
 
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "ruby-readability"
6
- s.version = '0.5.4'
6
+ s.version = '0.5.5'
7
7
  s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
8
8
  s.email = ["andrew@iterationlabs.com"]
9
9
  s.homepage = "http://github.com/iterationlabs/ruby-readability"
@@ -128,6 +128,75 @@ describe Readability do
128
128
  end
129
129
  end
130
130
 
131
+ describe "author" do
132
+ it "should pick up <meta name='dc.creator'></meta> as an author" do
133
+ doc = Readability::Document.new(<<-HTML)
134
+ <html>
135
+ <head>
136
+ <meta name='dc.creator' content='Austin Fonacier' />
137
+ </head>
138
+ <body></body>
139
+ </html>
140
+ HTML
141
+ doc.author.should eql("Austin Fonacier")
142
+ end
143
+
144
+ it "should pick up readability's recommended author format" do
145
+ doc = Readability::Document.new(<<-HTML)
146
+ <html>
147
+ <head>
148
+ </head>
149
+ <body>
150
+ <p class="byline author vcard">
151
+ By <cite class="fn">Austin Fonacier</span>
152
+ </p>
153
+ </body>
154
+ </html>
155
+ HTML
156
+ doc.author.should eql("Austin Fonacier")
157
+ end
158
+
159
+ it "should pick up vcard fn" do
160
+ doc = Readability::Document.new(<<-HTML)
161
+ <html>
162
+ <head>
163
+ </head>
164
+ <body>
165
+ <div class="author">By</div>
166
+ <div class="author vcard">
167
+ <a class="url fn" href="http://austinlivesinyotests.com/">Austin Fonacier</a>
168
+ </div>
169
+ </body>
170
+ </html>
171
+ HTML
172
+ doc.author.should eql("Austin Fonacier")
173
+ end
174
+
175
+ it "should pick up <a rel='author'>" do
176
+ doc = Readability::Document.new(<<-HTML)
177
+ <html>
178
+ <head></head>
179
+ <body>
180
+ <a rel="author" href="http://google.com">Danny Banks (rel)</a>
181
+ </body>
182
+ </html>
183
+ HTML
184
+ doc.author.should eql("Danny Banks (rel)")
185
+ end
186
+
187
+ it "should pick up <div id='author'>" do
188
+ doc = Readability::Document.new(<<-HTML)
189
+ <html>
190
+ <head></head>
191
+ <body>
192
+ <div id="author">Austin Fonacier (author)</div>
193
+ </body>
194
+ </html>
195
+ HTML
196
+ doc.author.should eql("Austin Fonacier (author)")
197
+ end
198
+ end
199
+
131
200
  describe "score_node" do
132
201
  before do
133
202
  @doc = Readability::Document.new(<<-HTML)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-readability
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.5.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2012-07-27 00:00:00.000000000 Z
15
+ date: 2012-10-02 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rspec
@@ -105,6 +105,7 @@ files:
105
105
  - .document
106
106
  - .gitignore
107
107
  - .rspec
108
+ - CHANGES.markdown
108
109
  - Gemfile
109
110
  - README.markdown
110
111
  - Rakefile
@@ -150,8 +151,26 @@ required_rubygems_version: !ruby/object:Gem::Requirement
150
151
  version: '0'
151
152
  requirements: []
152
153
  rubyforge_project: ruby-readability
153
- rubygems_version: 1.8.19
154
+ rubygems_version: 1.8.21
154
155
  signing_key:
155
156
  specification_version: 3
156
157
  summary: Port of arc90's readability project to ruby
157
- test_files: []
158
+ test_files:
159
+ - spec/fixtures/bbc.html
160
+ - spec/fixtures/cant_read.html
161
+ - spec/fixtures/images/dim_1416768a.jpg
162
+ - spec/fixtures/nytimes.html
163
+ - spec/fixtures/sample.html
164
+ - spec/fixtures/samples/blogpost_with_links-fragments.rb
165
+ - spec/fixtures/samples/blogpost_with_links.html
166
+ - spec/fixtures/samples/channel4-1-fragments.rb
167
+ - spec/fixtures/samples/channel4-1.html
168
+ - spec/fixtures/samples/foxnews-india1-fragments.rb
169
+ - spec/fixtures/samples/foxnews-india1.html
170
+ - spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb
171
+ - spec/fixtures/samples/globemail-ottawa-cuts.html
172
+ - spec/fixtures/should_not_truncate.txt
173
+ - spec/fixtures/thesun.html
174
+ - spec/readability_spec.rb
175
+ - spec/spec.opts
176
+ - spec/spec_helper.rb