ruby-readability 0.5.0.pre → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -20,11 +20,13 @@ Options:
20
20
  You may provide additions options to Readability::Document.new, including:
21
21
 
22
22
  :tags - the base whitelist of tags to sanitize, defaults to %w[div p]
23
- :remove_empty_nodes - remove <p> tags that have no text content; this will also remove p tags that contain only images
23
+ :remove_empty_nodes - remove <p> tags that have no text content; also removes p tags that contain only images
24
24
  :attributes - whitelist of allowed attributes
25
25
  :debug - provide debugging output, defaults false
26
- :encoding - if this page is of a known encoding, you can specify it; if left unspecified, the encoding will be guessed (only in Ruby 1.9.x)
27
- :html_headers - in Ruby 1.9.x these will be passed to the guess_html_encoding gem to aid with guessing the HTML encoding
26
+ :encoding - if this page is of a known encoding, you can specify it; if left
27
+ unspecified, the encoding will be guessed (only in Ruby 1.9.x)
28
+ :html_headers - in Ruby 1.9.x these will be passed to the guess_html_encoding gem
29
+ to aid with guessing the HTML encoding
28
30
 
29
31
  Readability comes with a command-line tool for experimentation in bin/readability.
30
32
 
data/lib/readability.rb CHANGED
@@ -49,10 +49,15 @@ module Readability
49
49
  :videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
50
50
  }
51
51
 
52
+ def title
53
+ title = @html.css("title").first
54
+ title ? title.text : nil
55
+ end
56
+
52
57
  def content(remove_unlikely_candidates = :default)
53
58
  @remove_unlikely_candidates = false if remove_unlikely_candidates == false
54
59
 
55
- @html.css("script, style").each { |i| i.remove }
60
+ @html.css("script, style").each(&:remove)
56
61
 
57
62
  remove_unlikely_candidates! if @remove_unlikely_candidates
58
63
  transform_misused_divs_into_paragraphs!
@@ -127,7 +132,7 @@ module Readability
127
132
  end
128
133
 
129
134
  def get_link_density(elem)
130
- link_length = elem.css("a").map {|i| i.text}.join("").length
135
+ link_length = elem.css("a").map(&:text).join("").length
131
136
  text_length = elem.text.length
132
137
  link_length / text_length.to_f
133
138
  end
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "ruby-readability"
6
- s.version = '0.5.0.pre'
6
+ s.version = '0.5.0'
7
7
  s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
8
8
  s.email = ["andrew@iterationlabs.com"]
9
9
  s.homepage = "http://github.com/iterationlabs/ruby-readability"
@@ -163,6 +163,14 @@ describe Readability do
163
163
  it "should return the main page content" do
164
164
  @doc.content.should match("Some content")
165
165
  end
166
+
167
+ it "should return the page title if present" do
168
+ @doc.title.should match("title!")
169
+
170
+ doc = Readability::Document.new("<html><head></head><body><div><p>Some content</p></div></body>",
171
+ :min_text_length => 0, :retry_length => 1)
172
+ doc.title.should be_nil
173
+ end
166
174
  end
167
175
 
168
176
  describe "ignoring sidebars" do
metadata CHANGED
@@ -1,8 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-readability
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0.pre
5
- prerelease: 6
4
+ version: 0.5.0
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Andrew Cantino
@@ -12,11 +12,11 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2011-10-26 00:00:00.000000000Z
15
+ date: 2012-01-24 00:00:00.000000000Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rspec
19
- requirement: &70170467589400 !ruby/object:Gem::Requirement
19
+ requirement: &70232951278200 !ruby/object:Gem::Requirement
20
20
  none: false
21
21
  requirements:
22
22
  - - ! '>='
@@ -24,10 +24,10 @@ dependencies:
24
24
  version: '2.6'
25
25
  type: :development
26
26
  prerelease: false
27
- version_requirements: *70170467589400
27
+ version_requirements: *70232951278200
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: rr
30
- requirement: &70170467588900 !ruby/object:Gem::Requirement
30
+ requirement: &70232951277680 !ruby/object:Gem::Requirement
31
31
  none: false
32
32
  requirements:
33
33
  - - ! '>='
@@ -35,10 +35,10 @@ dependencies:
35
35
  version: '1.0'
36
36
  type: :development
37
37
  prerelease: false
38
- version_requirements: *70170467588900
38
+ version_requirements: *70232951277680
39
39
  - !ruby/object:Gem::Dependency
40
40
  name: nokogiri
41
- requirement: &70170467588440 !ruby/object:Gem::Requirement
41
+ requirement: &70232951277200 !ruby/object:Gem::Requirement
42
42
  none: false
43
43
  requirements:
44
44
  - - ! '>='
@@ -46,10 +46,10 @@ dependencies:
46
46
  version: 1.4.2
47
47
  type: :runtime
48
48
  prerelease: false
49
- version_requirements: *70170467588440
49
+ version_requirements: *70232951277200
50
50
  - !ruby/object:Gem::Dependency
51
51
  name: guess_html_encoding
52
- requirement: &70170467587980 !ruby/object:Gem::Requirement
52
+ requirement: &70232951276720 !ruby/object:Gem::Requirement
53
53
  none: false
54
54
  requirements:
55
55
  - - ! '>='
@@ -57,7 +57,7 @@ dependencies:
57
57
  version: 0.0.2
58
58
  type: :runtime
59
59
  prerelease: false
60
- version_requirements: *70170467587980
60
+ version_requirements: *70232951276720
61
61
  description: Port of arc90's readability project to ruby
62
62
  email:
63
63
  - andrew@iterationlabs.com
@@ -104,12 +104,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
104
104
  required_rubygems_version: !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
- - - ! '>'
107
+ - - ! '>='
108
108
  - !ruby/object:Gem::Version
109
- version: 1.3.1
109
+ version: '0'
110
110
  requirements: []
111
111
  rubyforge_project: ruby-readability
112
- rubygems_version: 1.8.6
112
+ rubygems_version: 1.8.10
113
113
  signing_key:
114
114
  specification_version: 3
115
115
  summary: Port of arc90's readability project to ruby