ruby-readability 0.2.3 → 0.3.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/README +24 -0
- data/Rakefile +3 -42
- data/bin/readability +29 -2
- data/lib/readability.rb +20 -11
- data/lib/ruby-readability.rb +1 -0
- data/ruby-readability.gemspec +17 -65
- data/spec/fixtures/samples/blogpost_with_links-fragments.rb +1 -0
- data/spec/fixtures/samples/channel4-1-fragments.rb +1 -1
- data/spec/fixtures/samples/foxnews-india1-fragments.rb +1 -1
- data/spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb +1 -1
- data/spec/readability_spec.rb +31 -10
- data/spec/spec_helper.rb +3 -6
- metadata +73 -69
- data/VERSION +0 -1
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README
CHANGED
@@ -15,6 +15,30 @@ Example:
|
|
15
15
|
source = open('http://lab.arc90.com/experiments/readability/').read
|
16
16
|
puts Readability::Document.new(source).content
|
17
17
|
|
18
|
+
Options:
|
19
|
+
|
20
|
+
You may provide additions options to Readability::Document.new, including:
|
21
|
+
|
22
|
+
:tags - the base whitelist of tags to sanitize, defaults to %w[div p]
|
23
|
+
:remove_empty_nodes - remove <p> tags that have no text content; this will also remove p tags that contain only images
|
24
|
+
:attributes - whitelist of allowed attributes
|
25
|
+
:debug - provide debugging output, defaults false
|
26
|
+
:encoding - if this page is of a known encoding, you can specify it; if left unspecified, the encoding will be guessed (only in Ruby 1.9.x)
|
27
|
+
:html_headers - in Ruby 1.9.x these will be passed to the guess_html_encoding gem to aid with guessing the HTML encoding
|
28
|
+
|
29
|
+
Readability comes with a command-line tool for experimentation in bin/readability.
|
30
|
+
|
31
|
+
Usage: readability [options] URL
|
32
|
+
-d, --debug Show debug output
|
33
|
+
-i, --images Keep images and links
|
34
|
+
-h, --help Show this message
|
35
|
+
|
36
|
+
Potential issues:
|
37
|
+
|
38
|
+
* If you're on a Mac and are getting segmentation faults, see this discussion https://github.com/tenderlove/nokogiri/issues/404 and consider updating your version of libxml2.
|
39
|
+
Version 2.7.8 of libxml2 with the following worked for me:
|
40
|
+
gem install nokogiri -- --with-xml2-include=/usr/local/Cellar/libxml2/2.7.8/include/libxml2 --with-xml2-lib=/usr/local/Cellar/libxml2/2.7.8/lib --with-xslt-dir=/usr/local/Cellar/libxslt/1.1.26
|
41
|
+
|
18
42
|
===
|
19
43
|
|
20
44
|
This code is under the Apache License 2.0. http://www.apache.org/licenses/LICENSE-2.0
|
data/Rakefile
CHANGED
@@ -1,45 +1,6 @@
|
|
1
|
-
require
|
2
|
-
require '
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rspec/core/rake_task'
|
3
3
|
|
4
|
-
|
5
|
-
require 'jeweler'
|
6
|
-
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "ruby-readability"
|
8
|
-
gem.summary = %Q{Port of arc90's readability project to ruby}
|
9
|
-
gem.description = %Q{Port of arc90's readability project to ruby}
|
10
|
-
gem.email = "andrew@iterationlabs.com"
|
11
|
-
gem.homepage = "http://github.com/iterationlabs/ruby-readability"
|
12
|
-
gem.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
|
13
|
-
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
-
gem.add_dependency 'nokogiri', '>= 1.4.2'
|
15
|
-
end
|
16
|
-
Jeweler::GemcutterTasks.new
|
17
|
-
rescue LoadError
|
18
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
-
end
|
20
|
-
|
21
|
-
require 'spec/rake/spectask'
|
22
|
-
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
-
spec.libs << 'lib' << 'spec'
|
24
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
-
end
|
26
|
-
|
27
|
-
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
-
spec.libs << 'lib' << 'spec'
|
29
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
-
spec.rcov = true
|
31
|
-
end
|
32
|
-
|
33
|
-
task :spec => :check_dependencies
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
34
5
|
|
35
6
|
task :default => :spec
|
36
|
-
|
37
|
-
require 'rake/rdoctask'
|
38
|
-
Rake::RDocTask.new do |rdoc|
|
39
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
-
|
41
|
-
rdoc.rdoc_dir = 'rdoc'
|
42
|
-
rdoc.title = "ruby-readability #{version}"
|
43
|
-
rdoc.rdoc_files.include('README*')
|
44
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
-
end
|
data/bin/readability
CHANGED
@@ -2,12 +2,39 @@
|
|
2
2
|
$KCODE='u'
|
3
3
|
require 'rubygems'
|
4
4
|
require 'open-uri'
|
5
|
+
require 'optparse'
|
5
6
|
require File.dirname(__FILE__) + '/../lib/readability'
|
6
7
|
|
8
|
+
options = { :debug => false, :images => false }
|
9
|
+
options_parser = OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: #{File.basename($0)} [options] URL"
|
11
|
+
|
12
|
+
opts.on("-d", "--debug", "Show debug output") do |v|
|
13
|
+
options[:debug] = v
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on("-i", "--images", "Keep images and links") do |i|
|
17
|
+
options[:images] = i
|
18
|
+
end
|
19
|
+
|
20
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
21
|
+
puts opts
|
22
|
+
exit
|
23
|
+
end
|
24
|
+
end
|
25
|
+
options_parser.parse!
|
26
|
+
|
7
27
|
if ARGV.length != 1
|
8
|
-
STDERR.puts
|
28
|
+
STDERR.puts options_parser
|
9
29
|
exit 1
|
10
30
|
end
|
11
31
|
|
12
32
|
text = open(ARGV.first).read
|
13
|
-
|
33
|
+
if options[:images]
|
34
|
+
puts Readability::Document.new(text, :tags => %w[div p img a],
|
35
|
+
:attributes => %w[src href],
|
36
|
+
:remove_empty_nodes => false,
|
37
|
+
:debug => options[:debug]).content
|
38
|
+
else
|
39
|
+
puts Readability::Document.new(text, :debug => options[:debug]).content
|
40
|
+
end
|
data/lib/readability.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'nokogiri'
|
3
|
+
require 'guess_html_encoding'
|
3
4
|
|
4
5
|
module Readability
|
5
6
|
class Document
|
@@ -8,14 +9,22 @@ module Readability
|
|
8
9
|
:min_text_length => 25,
|
9
10
|
:remove_unlikely_candidates => true,
|
10
11
|
:weight_classes => true,
|
11
|
-
:clean_conditionally => true
|
12
|
+
:clean_conditionally => true,
|
13
|
+
:remove_empty_nodes => true
|
12
14
|
}.freeze
|
13
15
|
|
14
16
|
attr_accessor :options, :html
|
15
17
|
|
16
18
|
def initialize(input, options = {})
|
17
|
-
@input = input.gsub(REGEXES[:replaceBrsRe], '</p><p>').gsub(REGEXES[:replaceFontsRe], '<\1span>')
|
18
19
|
@options = DEFAULT_OPTIONS.merge(options)
|
20
|
+
@input = input
|
21
|
+
|
22
|
+
if RUBY_VERSION =~ /^1\.9\./ && !@options[:encoding]
|
23
|
+
@input = GuessHtmlEncoding.encode(@input, @options[:html_headers]) unless @options[:do_not_guess_encoding]
|
24
|
+
@options[:encoding] = @input.encoding.to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
@input = @input.gsub(REGEXES[:replaceBrsRe], '</p><p>').gsub(REGEXES[:replaceFontsRe], '<\1span>')
|
19
28
|
@remove_unlikely_candidates = @options[:remove_unlikely_candidates]
|
20
29
|
@weight_classes = @options[:weight_classes]
|
21
30
|
@clean_conditionally = @options[:clean_conditionally]
|
@@ -23,7 +32,7 @@ module Readability
|
|
23
32
|
end
|
24
33
|
|
25
34
|
def make_html
|
26
|
-
@html = Nokogiri::HTML(@input, nil,
|
35
|
+
@html = Nokogiri::HTML(@input, nil, @options[:encoding])
|
27
36
|
end
|
28
37
|
|
29
38
|
REGEXES = {
|
@@ -221,7 +230,7 @@ module Readability
|
|
221
230
|
# wrap text nodes in p tags
|
222
231
|
# elem.children.each do |child|
|
223
232
|
# if child.text?
|
224
|
-
|
233
|
+
# debug("wrapping text node with a p")
|
225
234
|
# child.swap("<p>#{child.text}</p>")
|
226
235
|
# end
|
227
236
|
# end
|
@@ -238,9 +247,11 @@ module Readability
|
|
238
247
|
elem.remove
|
239
248
|
end
|
240
249
|
|
241
|
-
|
242
|
-
|
243
|
-
|
250
|
+
if @options[:remove_empty_nodes]
|
251
|
+
# remove <p> tags that have no text content - this will also remove p tags that contain only images.
|
252
|
+
node.css("p").each do |elem|
|
253
|
+
elem.remove if elem.content.strip.empty?
|
254
|
+
end
|
244
255
|
end
|
245
256
|
|
246
257
|
# Conditionally clean <table>s, <ul>s, and <div>s
|
@@ -259,7 +270,6 @@ module Readability
|
|
259
270
|
base_replace_with_whitespace.each { |tag| replace_with_whitespace[tag] = true }
|
260
271
|
|
261
272
|
([node] + node.css("*")).each do |el|
|
262
|
-
|
263
273
|
# If element is in whitelist, delete all its attributes
|
264
274
|
if whitelist[el.node_name]
|
265
275
|
el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
|
@@ -267,10 +277,9 @@ module Readability
|
|
267
277
|
# Otherwise, replace the element with its contents
|
268
278
|
else
|
269
279
|
if replace_with_whitespace[el.node_name]
|
270
|
-
|
271
|
-
el.swap(' ' << el.text << ' ')
|
280
|
+
el.swap(Nokogiri::XML::Text.new(' ' << el.text << ' ', el.document))
|
272
281
|
else
|
273
|
-
el.swap(el.text)
|
282
|
+
el.swap(Nokogiri::XML::Text.new(el.text, el.document))
|
274
283
|
end
|
275
284
|
end
|
276
285
|
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'readability'
|
data/ruby-readability.gemspec
CHANGED
@@ -1,72 +1,24 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
5
3
|
|
6
4
|
Gem::Specification.new do |s|
|
7
|
-
s.name
|
8
|
-
s.version
|
9
|
-
|
10
|
-
s.
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
s.default_executable = %q{readability}
|
5
|
+
s.name = "ruby-readability"
|
6
|
+
s.version = '0.3.0.pre'
|
7
|
+
s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
|
8
|
+
s.email = ["andrew@iterationlabs.com"]
|
9
|
+
s.homepage = "http://github.com/iterationlabs/ruby-readability"
|
10
|
+
s.summary = %q{Port of arc90's readability project to ruby}
|
14
11
|
s.description = %q{Port of arc90's readability project to ruby}
|
15
|
-
s.email = %q{andrew@iterationlabs.com}
|
16
|
-
s.executables = ["readability"]
|
17
|
-
s.extra_rdoc_files = [
|
18
|
-
"README"
|
19
|
-
]
|
20
|
-
s.files = [
|
21
|
-
".document",
|
22
|
-
"README",
|
23
|
-
"Rakefile",
|
24
|
-
"VERSION",
|
25
|
-
"bin/readability",
|
26
|
-
"lib/readability.rb",
|
27
|
-
"ruby-readability.gemspec",
|
28
|
-
"spec/fixtures/cant_read.html",
|
29
|
-
"spec/fixtures/sample.html",
|
30
|
-
"spec/fixtures/samples/blogpost_with_links-fragments.rb",
|
31
|
-
"spec/fixtures/samples/blogpost_with_links.html",
|
32
|
-
"spec/fixtures/samples/channel4-1-fragments.rb",
|
33
|
-
"spec/fixtures/samples/channel4-1.html",
|
34
|
-
"spec/fixtures/samples/foxnews-india1-fragments.rb",
|
35
|
-
"spec/fixtures/samples/foxnews-india1.html",
|
36
|
-
"spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb",
|
37
|
-
"spec/fixtures/samples/globemail-ottawa-cuts.html",
|
38
|
-
"spec/fixtures/should_not_truncate.txt",
|
39
|
-
"spec/readability_spec.rb",
|
40
|
-
"spec/spec.opts",
|
41
|
-
"spec/spec_helper.rb"
|
42
|
-
]
|
43
|
-
s.homepage = %q{http://github.com/iterationlabs/ruby-readability}
|
44
|
-
s.require_paths = ["lib"]
|
45
|
-
s.rubygems_version = %q{1.3.7}
|
46
|
-
s.summary = %q{Port of arc90's readability project to ruby}
|
47
|
-
s.test_files = [
|
48
|
-
"spec/fixtures/samples/blogpost_with_links-fragments.rb",
|
49
|
-
"spec/fixtures/samples/channel4-1-fragments.rb",
|
50
|
-
"spec/fixtures/samples/foxnews-india1-fragments.rb",
|
51
|
-
"spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb",
|
52
|
-
"spec/readability_spec.rb",
|
53
|
-
"spec/spec_helper.rb"
|
54
|
-
]
|
55
12
|
|
56
|
-
|
57
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
58
|
-
s.specification_version = 3
|
13
|
+
s.rubyforge_project = "ruby-readability"
|
59
14
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
65
|
-
s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
|
66
|
-
end
|
67
|
-
else
|
68
|
-
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
69
|
-
s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
|
70
|
-
end
|
71
|
-
end
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
72
19
|
|
20
|
+
s.add_development_dependency "rspec", ">= 2.6"
|
21
|
+
s.add_development_dependency "rr", ">= 1.0"
|
22
|
+
s.add_dependency 'nokogiri', '>= 1.4.2'
|
23
|
+
s.add_dependency 'guess_html_encoding', '>= 0.0.2'
|
24
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# encoding: UTF-8
|
2
2
|
# This sample originally from http://www.foxnews.com/world/2010/05/14/police-killed-bus-touches-high-voltage-wire-central-india/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed:+foxnews/latest+(Text+-+Latest+Headlines)
|
3
3
|
|
4
4
|
$required_fragments = [
|
data/spec/readability_spec.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
2
4
|
|
3
5
|
describe Readability do
|
4
6
|
before do
|
@@ -115,11 +117,9 @@ describe Readability do
|
|
115
117
|
b[:content_score] <=> a[:content_score]
|
116
118
|
}.first[:elem][:id].should == "body"
|
117
119
|
end
|
118
|
-
end
|
119
120
|
|
120
|
-
describe "score_paragraphs" do
|
121
121
|
context "when two consequent br tags are used instead of p" do
|
122
|
-
|
122
|
+
it "should assign the higher score to the first paragraph in this particular example" do
|
123
123
|
@doc = Readability::Document.new(<<-HTML)
|
124
124
|
<html>
|
125
125
|
<head>
|
@@ -140,9 +140,6 @@ describe Readability do
|
|
140
140
|
</html>
|
141
141
|
HTML
|
142
142
|
@candidates = @doc.score_paragraphs(0)
|
143
|
-
end
|
144
|
-
|
145
|
-
it "should assign the higher score to the first paragraph in this particular example" do
|
146
143
|
@candidates.values.sort_by { |a| -a[:content_score] }.first[:elem][:id].should == 'post1'
|
147
144
|
end
|
148
145
|
end
|
@@ -204,14 +201,13 @@ describe Readability do
|
|
204
201
|
end
|
205
202
|
|
206
203
|
it "should output expected fragments of text" do
|
207
|
-
|
208
204
|
checks = 0
|
209
205
|
@samples.each do |sample|
|
210
206
|
html = File.read(File.dirname(__FILE__) + "/fixtures/samples/#{sample}.html")
|
211
207
|
doc = Readability::Document.new(html).content
|
212
208
|
|
213
209
|
load "fixtures/samples/#{sample}-fragments.rb"
|
214
|
-
puts "testing #{sample}..."
|
210
|
+
#puts "testing #{sample}..."
|
215
211
|
|
216
212
|
$required_fragments.each do |required_text|
|
217
213
|
doc.should include(required_text)
|
@@ -223,7 +219,32 @@ describe Readability do
|
|
223
219
|
checks += 1
|
224
220
|
end
|
225
221
|
end
|
226
|
-
puts "Performed #{checks} checks."
|
222
|
+
#puts "Performed #{checks} checks."
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
describe "encoding guessing" do
|
227
|
+
if RUBY_VERSION =~ /^1\.9\./
|
228
|
+
context "with ruby 1.9.2" do
|
229
|
+
it "should correctly guess and enforce HTML encoding" do
|
230
|
+
doc = Readability::Document.new("<html><head><meta http-equiv='content-type' content='text/html; charset=LATIN1'></head><body><div>hi!</div></body></html>")
|
231
|
+
content = doc.content
|
232
|
+
content.encoding.to_s.should == "ISO-8859-1"
|
233
|
+
content.should be_valid_encoding
|
234
|
+
end
|
235
|
+
|
236
|
+
it "should allow encoding guessing to be skipped" do
|
237
|
+
do_not_allow(GuessHtmlEncoding).encode
|
238
|
+
doc = Readability::Document.new(@simple_html_fixture, :do_not_guess_encoding => true)
|
239
|
+
doc.content
|
240
|
+
end
|
241
|
+
|
242
|
+
it "should allow encoding guessing to be overridden" do
|
243
|
+
do_not_allow(GuessHtmlEncoding).encode
|
244
|
+
doc = Readability::Document.new(@simple_html_fixture, :encoding => "UTF-8")
|
245
|
+
doc.content
|
246
|
+
end
|
247
|
+
end
|
227
248
|
end
|
228
249
|
end
|
229
250
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
1
|
require 'rubygems'
|
4
2
|
require 'readability'
|
5
|
-
require '
|
6
|
-
require 'spec/autorun'
|
7
|
-
|
8
|
-
Spec::Runner.configure do |config|
|
3
|
+
require 'rr'
|
9
4
|
|
5
|
+
RSpec.configure do |config|
|
6
|
+
config.mock_with :rr
|
10
7
|
end
|
metadata
CHANGED
@@ -1,15 +1,10 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-readability
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 2
|
9
|
-
- 3
|
10
|
-
version: 0.2.3
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0.pre
|
5
|
+
prerelease: 6
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Andrew Cantino
|
14
9
|
- starrhorne
|
15
10
|
- libc
|
@@ -17,57 +12,68 @@ authors:
|
|
17
12
|
autorequire:
|
18
13
|
bindir: bin
|
19
14
|
cert_chain: []
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
dependencies:
|
24
|
-
- !ruby/object:Gem::Dependency
|
15
|
+
date: 2011-10-26 00:00:00.000000000Z
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
25
18
|
name: rspec
|
19
|
+
requirement: &70185897981180 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ! '>='
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '2.6'
|
25
|
+
type: :development
|
26
26
|
prerelease: false
|
27
|
-
|
27
|
+
version_requirements: *70185897981180
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rr
|
30
|
+
requirement: &70185897980680 !ruby/object:Gem::Requirement
|
28
31
|
none: false
|
29
|
-
requirements:
|
30
|
-
- -
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
|
33
|
-
segments:
|
34
|
-
- 1
|
35
|
-
- 2
|
36
|
-
- 9
|
37
|
-
version: 1.2.9
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '1.0'
|
38
36
|
type: :development
|
39
|
-
version_requirements: *id001
|
40
|
-
- !ruby/object:Gem::Dependency
|
41
|
-
name: nokogiri
|
42
37
|
prerelease: false
|
43
|
-
|
38
|
+
version_requirements: *70185897980680
|
39
|
+
- !ruby/object:Gem::Dependency
|
40
|
+
name: nokogiri
|
41
|
+
requirement: &70185897980220 !ruby/object:Gem::Requirement
|
44
42
|
none: false
|
45
|
-
requirements:
|
46
|
-
- -
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
hash: 3
|
49
|
-
segments:
|
50
|
-
- 1
|
51
|
-
- 4
|
52
|
-
- 2
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
53
46
|
version: 1.4.2
|
54
47
|
type: :runtime
|
55
|
-
|
48
|
+
prerelease: false
|
49
|
+
version_requirements: *70185897980220
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: guess_html_encoding
|
52
|
+
requirement: &70185897979760 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 0.0.2
|
58
|
+
type: :runtime
|
59
|
+
prerelease: false
|
60
|
+
version_requirements: *70185897979760
|
56
61
|
description: Port of arc90's readability project to ruby
|
57
|
-
email:
|
58
|
-
|
62
|
+
email:
|
63
|
+
- andrew@iterationlabs.com
|
64
|
+
executables:
|
59
65
|
- readability
|
60
66
|
extensions: []
|
61
|
-
|
62
|
-
|
63
|
-
- README
|
64
|
-
files:
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
65
69
|
- .document
|
70
|
+
- .gitignore
|
71
|
+
- Gemfile
|
66
72
|
- README
|
67
73
|
- Rakefile
|
68
|
-
- VERSION
|
69
74
|
- bin/readability
|
70
75
|
- lib/readability.rb
|
76
|
+
- lib/ruby-readability.rb
|
71
77
|
- ruby-readability.gemspec
|
72
78
|
- spec/fixtures/cant_read.html
|
73
79
|
- spec/fixtures/sample.html
|
@@ -83,44 +89,42 @@ files:
|
|
83
89
|
- spec/readability_spec.rb
|
84
90
|
- spec/spec.opts
|
85
91
|
- spec/spec_helper.rb
|
86
|
-
has_rdoc: true
|
87
92
|
homepage: http://github.com/iterationlabs/ruby-readability
|
88
93
|
licenses: []
|
89
|
-
|
90
94
|
post_install_message:
|
91
95
|
rdoc_options: []
|
92
|
-
|
93
|
-
require_paths:
|
96
|
+
require_paths:
|
94
97
|
- lib
|
95
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
99
|
none: false
|
97
|
-
requirements:
|
98
|
-
- -
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
|
101
|
-
|
102
|
-
- 0
|
103
|
-
version: "0"
|
104
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
|
-
requirements:
|
107
|
-
- -
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
|
110
|
-
segments:
|
111
|
-
- 0
|
112
|
-
version: "0"
|
106
|
+
requirements:
|
107
|
+
- - ! '>'
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.3.1
|
113
110
|
requirements: []
|
114
|
-
|
115
|
-
|
116
|
-
rubygems_version: 1.3.7
|
111
|
+
rubyforge_project: ruby-readability
|
112
|
+
rubygems_version: 1.8.6
|
117
113
|
signing_key:
|
118
114
|
specification_version: 3
|
119
115
|
summary: Port of arc90's readability project to ruby
|
120
|
-
test_files:
|
116
|
+
test_files:
|
117
|
+
- spec/fixtures/cant_read.html
|
118
|
+
- spec/fixtures/sample.html
|
121
119
|
- spec/fixtures/samples/blogpost_with_links-fragments.rb
|
120
|
+
- spec/fixtures/samples/blogpost_with_links.html
|
122
121
|
- spec/fixtures/samples/channel4-1-fragments.rb
|
122
|
+
- spec/fixtures/samples/channel4-1.html
|
123
123
|
- spec/fixtures/samples/foxnews-india1-fragments.rb
|
124
|
+
- spec/fixtures/samples/foxnews-india1.html
|
124
125
|
- spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb
|
126
|
+
- spec/fixtures/samples/globemail-ottawa-cuts.html
|
127
|
+
- spec/fixtures/should_not_truncate.txt
|
125
128
|
- spec/readability_spec.rb
|
129
|
+
- spec/spec.opts
|
126
130
|
- spec/spec_helper.rb
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.2.3
|