marcosinger-ruby-readability 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +7 -0
- data/.rspec +3 -0
- data/Gemfile +10 -0
- data/README +54 -0
- data/Rakefile +6 -0
- data/bin/readability +40 -0
- data/lib/readability.rb +402 -0
- data/lib/ruby-readability.rb +1 -0
- data/ruby-readability.gemspec +24 -0
- data/spec/fixtures/bbc.html +2069 -0
- data/spec/fixtures/cant_read.html +426 -0
- data/spec/fixtures/images/dim_1416768a.jpg +0 -0
- data/spec/fixtures/nytimes.html +58 -0
- data/spec/fixtures/sample.html +1198 -0
- data/spec/fixtures/samples/blogpost_with_links-fragments.rb +10 -0
- data/spec/fixtures/samples/blogpost_with_links.html +137 -0
- data/spec/fixtures/samples/channel4-1-fragments.rb +13 -0
- data/spec/fixtures/samples/channel4-1.html +1330 -0
- data/spec/fixtures/samples/foxnews-india1-fragments.rb +13 -0
- data/spec/fixtures/samples/foxnews-india1.html +2058 -0
- data/spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb +31 -0
- data/spec/fixtures/samples/globemail-ottawa-cuts.html +2410 -0
- data/spec/fixtures/should_not_truncate.txt +1077 -0
- data/spec/fixtures/thesun.html +1122 -0
- data/spec/readability_spec.rb +330 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +11 -0
- metadata +176 -0
@@ -0,0 +1,330 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe Readability do
|
6
|
+
before do
|
7
|
+
@simple_html_fixture = <<-HTML
|
8
|
+
<html>
|
9
|
+
<head>
|
10
|
+
<title>title!</title>
|
11
|
+
</head>
|
12
|
+
<body class='comment'>
|
13
|
+
<div>
|
14
|
+
<p class='comment'>a comment</p>
|
15
|
+
<div class='comment' id='body'>real content</div>
|
16
|
+
<div id="contains_blockquote"><blockquote>something in a table</blockquote></div>
|
17
|
+
</div>
|
18
|
+
</body>
|
19
|
+
</html>
|
20
|
+
HTML
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "images" do
|
24
|
+
before do
|
25
|
+
# bbc => http://www.bbc.co.uk/news/magazine-15959067
|
26
|
+
# nytimes => http://opinionator.blogs.nytimes.com/2011/12/01/health-care-for-a-changing-work-force/
|
27
|
+
# thesum => http://www.thesun.co.uk/sol/homepage/sport/football/3973265/Manchester-United-news-Dimitar-Berbatov-and-Carling-Cup-flops-warned.html
|
28
|
+
|
29
|
+
@bbc = File.read(File.dirname(__FILE__) + "/fixtures/bbc.html")
|
30
|
+
@nytimes = File.read(File.dirname(__FILE__) + "/fixtures/nytimes.html")
|
31
|
+
@thesum = File.read(File.dirname(__FILE__) + "/fixtures/thesun.html")
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should show one image, but outside of the best candidate" do
|
35
|
+
@doc = Readability::Document.new(@thesum)
|
36
|
+
@doc.images.should == ["http://img.thesun.co.uk/multimedia/archive/01416/dim_1416768a.jpg"]
|
37
|
+
@doc.best_candidate_has_image.should == false
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should show one image inside of the best candidate" do
|
41
|
+
@doc = Readability::Document.new(@nytimes)
|
42
|
+
@doc.images.should == ["http://graphics8.nytimes.com/images/2011/12/02/opinion/02fixes-freelancersunion/02fixes-freelancersunion-blog427.jpg"]
|
43
|
+
@doc.best_candidate_has_image.should == true
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "no images" do
|
47
|
+
it "shouldn't show images" do
|
48
|
+
@doc = Readability::Document.new(@bbc, :min_image_height => 400)
|
49
|
+
@doc.images.should == []
|
50
|
+
@doc.best_candidate_has_image.should == false
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "poll of images" do
|
55
|
+
it "should show some images inside of the best candidate" do
|
56
|
+
@doc = Readability::Document.new(@bbc)
|
57
|
+
@doc.images.should == ["http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027794_perseus_getty.jpg", "http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027786_john_capes229_rnsm.jpg", "http://news.bbcimg.co.uk/media/images/57055000/jpg/_57055063_perseus_thoctarides.jpg"]
|
58
|
+
@doc.best_candidate_has_image.should == true
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should show some images inside of the best candidate, include gif format" do
|
62
|
+
@doc = Readability::Document.new(@bbc, :ignore_image_format => [])
|
63
|
+
@doc.images.should == ["http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027794_perseus_getty.jpg", "http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027786_john_capes229_rnsm.jpg", "http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif", "http://news.bbcimg.co.uk/media/images/57055000/jpg/_57055063_perseus_thoctarides.jpg"]
|
64
|
+
@doc.best_candidate_has_image.should == true
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "width, height and format" do
|
68
|
+
it "should show some images inside of the best candidate, but with width most equal to 400px" do
|
69
|
+
@doc = Readability::Document.new(@bbc, :min_image_width => 400, :ignore_image_format => [])
|
70
|
+
@doc.images.should == ["http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027794_perseus_getty.jpg"]
|
71
|
+
@doc.best_candidate_has_image.should == true
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should show some images inside of the best candidate, but with width most equal to 304px" do
|
75
|
+
@doc = Readability::Document.new(@bbc, :min_image_width => 304, :ignore_image_format => [])
|
76
|
+
@doc.images.should == ["http://news.bbcimg.co.uk/media/images/57027000/jpg/_57027794_perseus_getty.jpg", "http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif", "http://news.bbcimg.co.uk/media/images/57055000/jpg/_57055063_perseus_thoctarides.jpg"]
|
77
|
+
@doc.best_candidate_has_image.should == true
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should show some images inside of the best candidate, but with width most equal to 304px and ignoring JPG format" do
|
81
|
+
@doc = Readability::Document.new(@bbc, :min_image_width => 304, :ignore_image_format => ["jpg"])
|
82
|
+
@doc.images.should == ["http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif"]
|
83
|
+
@doc.best_candidate_has_image.should == true
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should show some images inside of the best candidate, but with height most equal to 400px, no ignoring no format" do
|
87
|
+
@doc = Readability::Document.new(@bbc, :min_image_height => 400, :ignore_image_format => [])
|
88
|
+
@doc.images.should == ["http://news.bbcimg.co.uk/media/images/57060000/gif/_57060487_sub_escapes304x416.gif"]
|
89
|
+
@doc.best_candidate_has_image.should == true
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe "transformMisusedDivsIntoParagraphs" do
|
96
|
+
before do
|
97
|
+
@doc = Readability::Document.new(@simple_html_fixture)
|
98
|
+
@doc.transform_misused_divs_into_paragraphs!
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should transform divs containing no block elements into <p>s" do
|
102
|
+
@doc.html.css("#body").first.name.should == "p"
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should not transform divs that contain block elements" do
|
106
|
+
@doc.html.css("#contains_blockquote").first.name.should == "div"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "score_node" do
|
111
|
+
before do
|
112
|
+
@doc = Readability::Document.new(<<-HTML)
|
113
|
+
<html>
|
114
|
+
<body>
|
115
|
+
<div id='elem1'>
|
116
|
+
<p>some content</p>
|
117
|
+
</div>
|
118
|
+
<th id='elem2'>
|
119
|
+
<p>some other content</p>
|
120
|
+
</th>
|
121
|
+
</body>
|
122
|
+
</html>
|
123
|
+
HTML
|
124
|
+
@elem1 = @doc.html.css("#elem1").first
|
125
|
+
@elem2 = @doc.html.css("#elem2").first
|
126
|
+
end
|
127
|
+
|
128
|
+
it "should like <div>s more than <th>s" do
|
129
|
+
@doc.score_node(@elem1)[:content_score].should > @doc.score_node(@elem2)[:content_score]
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should like classes like text more than classes like comment" do
|
133
|
+
@elem2.name = "div"
|
134
|
+
@doc.score_node(@elem1)[:content_score].should == @doc.score_node(@elem2)[:content_score]
|
135
|
+
@elem1['class'] = "text"
|
136
|
+
@elem2['class'] = "comment"
|
137
|
+
@doc.score_node(@elem1)[:content_score].should > @doc.score_node(@elem2)[:content_score]
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe "remove_unlikely_candidates!" do
|
142
|
+
before do
|
143
|
+
@doc = Readability::Document.new(@simple_html_fixture)
|
144
|
+
@doc.remove_unlikely_candidates!
|
145
|
+
end
|
146
|
+
|
147
|
+
it "should remove things that have class comment" do
|
148
|
+
@doc.html.inner_html.should_not =~ /a comment/
|
149
|
+
end
|
150
|
+
|
151
|
+
it "should not remove body tags" do
|
152
|
+
@doc.html.inner_html.should =~ /<\/body>/
|
153
|
+
end
|
154
|
+
|
155
|
+
it "should not remove things with class comment and id body" do
|
156
|
+
@doc.html.inner_html.should =~ /real content/
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
describe "score_paragraphs" do
|
161
|
+
before(:each) do
|
162
|
+
@doc = Readability::Document.new(<<-HTML)
|
163
|
+
<html>
|
164
|
+
<head>
|
165
|
+
<title>title!</title>
|
166
|
+
</head>
|
167
|
+
<body id="body">
|
168
|
+
<div id="div1">
|
169
|
+
<div id="div2>
|
170
|
+
<p id="some_comment">a comment</p>
|
171
|
+
</div>
|
172
|
+
<p id="some_text">some text</p>
|
173
|
+
</div>
|
174
|
+
<div id="div3">
|
175
|
+
<p id="some_text2">some more text</p>
|
176
|
+
</div>
|
177
|
+
</body>
|
178
|
+
</html><!-- " -->
|
179
|
+
HTML
|
180
|
+
@candidates = @doc.score_paragraphs(0)
|
181
|
+
end
|
182
|
+
|
183
|
+
it "should score elements in the document" do
|
184
|
+
@candidates.values.length.should == 3
|
185
|
+
end
|
186
|
+
|
187
|
+
it "should prefer the body in this particular example" do
|
188
|
+
@candidates.values.sort { |a, b|
|
189
|
+
b[:content_score] <=> a[:content_score]
|
190
|
+
}.first[:elem][:id].should == "body"
|
191
|
+
end
|
192
|
+
|
193
|
+
context "when two consequent br tags are used instead of p" do
|
194
|
+
it "should assign the higher score to the first paragraph in this particular example" do
|
195
|
+
@doc = Readability::Document.new(<<-HTML)
|
196
|
+
<html>
|
197
|
+
<head>
|
198
|
+
<title>title!</title>
|
199
|
+
</head>
|
200
|
+
<body id="body">
|
201
|
+
<div id="post1">
|
202
|
+
This is the main content!<br/><br/>
|
203
|
+
Zebra found killed butcher with the chainsaw.<br/><br/>
|
204
|
+
If only I could think of an example, oh, wait.
|
205
|
+
</div>
|
206
|
+
<div id="post2">
|
207
|
+
This is not the content and although it's longer if you meaure it in characters,
|
208
|
+
it's supposed to have lower score than the previous paragraph. And it's only because
|
209
|
+
of the previous paragraph is not one paragraph, it's three subparagraphs
|
210
|
+
</div>
|
211
|
+
</body>
|
212
|
+
</html>
|
213
|
+
HTML
|
214
|
+
@candidates = @doc.score_paragraphs(0)
|
215
|
+
@candidates.values.sort_by { |a| -a[:content_score] }.first[:elem][:id].should == 'post1'
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
describe "the cant_read.html fixture" do
|
221
|
+
it "should work on the cant_read.html fixture with some allowed tags" do
|
222
|
+
allowed_tags = %w[div span table tr td p i strong u h1 h2 h3 h4 pre code br a]
|
223
|
+
allowed_attributes = %w[href]
|
224
|
+
html = File.read(File.dirname(__FILE__) + "/fixtures/cant_read.html")
|
225
|
+
Readability::Document.new(html, :tags => allowed_tags, :attributes => allowed_attributes).content.should match(/Can you talk a little about how you developed the looks for the/)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
describe "general functionality" do
|
230
|
+
before do
|
231
|
+
@doc = Readability::Document.new("<html><head><title>title!</title></head><body><div><p>Some content</p></div></body>",
|
232
|
+
:min_text_length => 0, :retry_length => 1)
|
233
|
+
end
|
234
|
+
|
235
|
+
it "should return the main page content" do
|
236
|
+
@doc.content.should match("Some content")
|
237
|
+
end
|
238
|
+
|
239
|
+
it "should return the page title if present" do
|
240
|
+
@doc.title.should match("title!")
|
241
|
+
|
242
|
+
doc = Readability::Document.new("<html><head></head><body><div><p>Some content</p></div></body>",
|
243
|
+
:min_text_length => 0, :retry_length => 1)
|
244
|
+
doc.title.should be_nil
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
describe "ignoring sidebars" do
|
249
|
+
before do
|
250
|
+
@doc = Readability::Document.new("<html><head><title>title!</title></head><body><div><p>Some content</p></div><div class='sidebar'><p>sidebar<p></div></body>",
|
251
|
+
:min_text_length => 0, :retry_length => 1)
|
252
|
+
end
|
253
|
+
|
254
|
+
it "should not return the sidebar" do
|
255
|
+
@doc.content.should_not match("sidebar")
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
describe "inserting space for block elements" do
|
260
|
+
before do
|
261
|
+
@doc = Readability::Document.new(<<-HTML, :min_text_length => 0, :retry_length => 1)
|
262
|
+
<html><head><title>title!</title></head>
|
263
|
+
<body>
|
264
|
+
<div>
|
265
|
+
<p>a<br>b<hr>c<address>d</address>f/p>
|
266
|
+
</div>
|
267
|
+
</body>
|
268
|
+
</html>
|
269
|
+
HTML
|
270
|
+
end
|
271
|
+
|
272
|
+
it "should not return the sidebar" do
|
273
|
+
@doc.content.should_not match("a b c d f")
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
describe "outputs good stuff for known documents" do
|
278
|
+
before do
|
279
|
+
@html_files = Dir.glob(File.dirname(__FILE__) + "/fixtures/samples/*.html")
|
280
|
+
@samples = @html_files.map {|filename| File.basename(filename, '.html') }
|
281
|
+
end
|
282
|
+
|
283
|
+
it "should output expected fragments of text" do
|
284
|
+
checks = 0
|
285
|
+
@samples.each do |sample|
|
286
|
+
html = File.read(File.dirname(__FILE__) + "/fixtures/samples/#{sample}.html")
|
287
|
+
doc = Readability::Document.new(html).content
|
288
|
+
|
289
|
+
load "fixtures/samples/#{sample}-fragments.rb"
|
290
|
+
#puts "testing #{sample}..."
|
291
|
+
|
292
|
+
$required_fragments.each do |required_text|
|
293
|
+
doc.should include(required_text)
|
294
|
+
checks += 1
|
295
|
+
end
|
296
|
+
|
297
|
+
$excluded_fragments.each do |text_to_avoid|
|
298
|
+
doc.should_not include(text_to_avoid)
|
299
|
+
checks += 1
|
300
|
+
end
|
301
|
+
end
|
302
|
+
#puts "Performed #{checks} checks."
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
describe "encoding guessing" do
|
307
|
+
if RUBY_VERSION =~ /^1\.9\./
|
308
|
+
context "with ruby 1.9.2" do
|
309
|
+
it "should correctly guess and enforce HTML encoding" do
|
310
|
+
doc = Readability::Document.new("<html><head><meta http-equiv='content-type' content='text/html; charset=LATIN1'></head><body><div>hi!</div></body></html>")
|
311
|
+
content = doc.content
|
312
|
+
content.encoding.to_s.should == "ISO-8859-1"
|
313
|
+
content.should be_valid_encoding
|
314
|
+
end
|
315
|
+
|
316
|
+
it "should allow encoding guessing to be skipped" do
|
317
|
+
do_not_allow(GuessHtmlEncoding).encode
|
318
|
+
doc = Readability::Document.new(@simple_html_fixture, :do_not_guess_encoding => true)
|
319
|
+
doc.content
|
320
|
+
end
|
321
|
+
|
322
|
+
it "should allow encoding guessing to be overridden" do
|
323
|
+
do_not_allow(GuessHtmlEncoding).encode
|
324
|
+
doc = Readability::Document.new(@simple_html_fixture, :encoding => "UTF-8")
|
325
|
+
doc.content
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'readability'
|
3
|
+
require 'rr'
|
4
|
+
require 'fakeweb'
|
5
|
+
|
6
|
+
RSpec.configure do |config|
|
7
|
+
config.mock_with :rr
|
8
|
+
end
|
9
|
+
|
10
|
+
FakeWeb.allow_net_connect = false
|
11
|
+
FakeWeb.register_uri(:get, "http://img.thesun.co.uk/multimedia/archive/01416/dim_1416768a.jpg", :body => File.read(File.dirname(__FILE__) + "/fixtures/images/dim_1416768a.jpg"))
|
metadata
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: marcosinger-ruby-readability
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 7
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 6
|
9
|
+
- 0
|
10
|
+
version: 0.6.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Andrew Cantino
|
14
|
+
- starrhorne
|
15
|
+
- libc
|
16
|
+
- Kyle Maxwell
|
17
|
+
- Marco Singer
|
18
|
+
autorequire:
|
19
|
+
bindir: bin
|
20
|
+
cert_chain: []
|
21
|
+
|
22
|
+
date: 2011-12-19 00:00:00 Z
|
23
|
+
dependencies:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: rspec
|
26
|
+
prerelease: false
|
27
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
hash: 15
|
33
|
+
segments:
|
34
|
+
- 2
|
35
|
+
- 6
|
36
|
+
version: "2.6"
|
37
|
+
type: :development
|
38
|
+
version_requirements: *id001
|
39
|
+
- !ruby/object:Gem::Dependency
|
40
|
+
name: rr
|
41
|
+
prerelease: false
|
42
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
hash: 15
|
48
|
+
segments:
|
49
|
+
- 1
|
50
|
+
- 0
|
51
|
+
version: "1.0"
|
52
|
+
type: :development
|
53
|
+
version_requirements: *id002
|
54
|
+
- !ruby/object:Gem::Dependency
|
55
|
+
name: nokogiri
|
56
|
+
prerelease: false
|
57
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
hash: 3
|
63
|
+
segments:
|
64
|
+
- 1
|
65
|
+
- 4
|
66
|
+
- 2
|
67
|
+
version: 1.4.2
|
68
|
+
type: :runtime
|
69
|
+
version_requirements: *id003
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: guess_html_encoding
|
72
|
+
prerelease: false
|
73
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
hash: 27
|
79
|
+
segments:
|
80
|
+
- 0
|
81
|
+
- 0
|
82
|
+
- 2
|
83
|
+
version: 0.0.2
|
84
|
+
type: :runtime
|
85
|
+
version_requirements: *id004
|
86
|
+
description: Port of arc90's readability project to ruby
|
87
|
+
email:
|
88
|
+
- andrew@iterationlabs.com
|
89
|
+
- markaum@gmail.com
|
90
|
+
executables:
|
91
|
+
- readability
|
92
|
+
extensions: []
|
93
|
+
|
94
|
+
extra_rdoc_files: []
|
95
|
+
|
96
|
+
files:
|
97
|
+
- .document
|
98
|
+
- .gitignore
|
99
|
+
- .rspec
|
100
|
+
- Gemfile
|
101
|
+
- README
|
102
|
+
- Rakefile
|
103
|
+
- bin/readability
|
104
|
+
- lib/readability.rb
|
105
|
+
- lib/ruby-readability.rb
|
106
|
+
- ruby-readability.gemspec
|
107
|
+
- spec/fixtures/bbc.html
|
108
|
+
- spec/fixtures/cant_read.html
|
109
|
+
- spec/fixtures/images/dim_1416768a.jpg
|
110
|
+
- spec/fixtures/nytimes.html
|
111
|
+
- spec/fixtures/sample.html
|
112
|
+
- spec/fixtures/samples/blogpost_with_links-fragments.rb
|
113
|
+
- spec/fixtures/samples/blogpost_with_links.html
|
114
|
+
- spec/fixtures/samples/channel4-1-fragments.rb
|
115
|
+
- spec/fixtures/samples/channel4-1.html
|
116
|
+
- spec/fixtures/samples/foxnews-india1-fragments.rb
|
117
|
+
- spec/fixtures/samples/foxnews-india1.html
|
118
|
+
- spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb
|
119
|
+
- spec/fixtures/samples/globemail-ottawa-cuts.html
|
120
|
+
- spec/fixtures/should_not_truncate.txt
|
121
|
+
- spec/fixtures/thesun.html
|
122
|
+
- spec/readability_spec.rb
|
123
|
+
- spec/spec.opts
|
124
|
+
- spec/spec_helper.rb
|
125
|
+
homepage: http://github.com/iterationlabs/ruby-readability
|
126
|
+
licenses: []
|
127
|
+
|
128
|
+
post_install_message:
|
129
|
+
rdoc_options: []
|
130
|
+
|
131
|
+
require_paths:
|
132
|
+
- lib
|
133
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
134
|
+
none: false
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
hash: 3
|
139
|
+
segments:
|
140
|
+
- 0
|
141
|
+
version: "0"
|
142
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
143
|
+
none: false
|
144
|
+
requirements:
|
145
|
+
- - ">="
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
hash: 3
|
148
|
+
segments:
|
149
|
+
- 0
|
150
|
+
version: "0"
|
151
|
+
requirements: []
|
152
|
+
|
153
|
+
rubyforge_project: ruby-readability
|
154
|
+
rubygems_version: 1.8.10
|
155
|
+
signing_key:
|
156
|
+
specification_version: 3
|
157
|
+
summary: Port of arc90's readability project to ruby
|
158
|
+
test_files:
|
159
|
+
- spec/fixtures/bbc.html
|
160
|
+
- spec/fixtures/cant_read.html
|
161
|
+
- spec/fixtures/images/dim_1416768a.jpg
|
162
|
+
- spec/fixtures/nytimes.html
|
163
|
+
- spec/fixtures/sample.html
|
164
|
+
- spec/fixtures/samples/blogpost_with_links-fragments.rb
|
165
|
+
- spec/fixtures/samples/blogpost_with_links.html
|
166
|
+
- spec/fixtures/samples/channel4-1-fragments.rb
|
167
|
+
- spec/fixtures/samples/channel4-1.html
|
168
|
+
- spec/fixtures/samples/foxnews-india1-fragments.rb
|
169
|
+
- spec/fixtures/samples/foxnews-india1.html
|
170
|
+
- spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb
|
171
|
+
- spec/fixtures/samples/globemail-ottawa-cuts.html
|
172
|
+
- spec/fixtures/should_not_truncate.txt
|
173
|
+
- spec/fixtures/thesun.html
|
174
|
+
- spec/readability_spec.rb
|
175
|
+
- spec/spec.opts
|
176
|
+
- spec/spec_helper.rb
|