ruby-readability 0.2.3 → 0.3.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/README +24 -0
- data/Rakefile +3 -42
- data/bin/readability +29 -2
- data/lib/readability.rb +20 -11
- data/lib/ruby-readability.rb +1 -0
- data/ruby-readability.gemspec +17 -65
- data/spec/fixtures/samples/blogpost_with_links-fragments.rb +1 -0
- data/spec/fixtures/samples/channel4-1-fragments.rb +1 -1
- data/spec/fixtures/samples/foxnews-india1-fragments.rb +1 -1
- data/spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb +1 -1
- data/spec/readability_spec.rb +31 -10
- data/spec/spec_helper.rb +3 -6
- metadata +73 -69
- data/VERSION +0 -1
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README
CHANGED
@@ -15,6 +15,30 @@ Example:
|
|
15
15
|
source = open('http://lab.arc90.com/experiments/readability/').read
|
16
16
|
puts Readability::Document.new(source).content
|
17
17
|
|
18
|
+
Options:
|
19
|
+
|
20
|
+
You may provide additions options to Readability::Document.new, including:
|
21
|
+
|
22
|
+
:tags - the base whitelist of tags to sanitize, defaults to %w[div p]
|
23
|
+
:remove_empty_nodes - remove <p> tags that have no text content; this will also remove p tags that contain only images
|
24
|
+
:attributes - whitelist of allowed attributes
|
25
|
+
:debug - provide debugging output, defaults false
|
26
|
+
:encoding - if this page is of a known encoding, you can specify it; if left unspecified, the encoding will be guessed (only in Ruby 1.9.x)
|
27
|
+
:html_headers - in Ruby 1.9.x these will be passed to the guess_html_encoding gem to aid with guessing the HTML encoding
|
28
|
+
|
29
|
+
Readability comes with a command-line tool for experimentation in bin/readability.
|
30
|
+
|
31
|
+
Usage: readability [options] URL
|
32
|
+
-d, --debug Show debug output
|
33
|
+
-i, --images Keep images and links
|
34
|
+
-h, --help Show this message
|
35
|
+
|
36
|
+
Potential issues:
|
37
|
+
|
38
|
+
* If you're on a Mac and are getting segmentation faults, see this discussion https://github.com/tenderlove/nokogiri/issues/404 and consider updating your version of libxml2.
|
39
|
+
Version 2.7.8 of libxml2 with the following worked for me:
|
40
|
+
gem install nokogiri -- --with-xml2-include=/usr/local/Cellar/libxml2/2.7.8/include/libxml2 --with-xml2-lib=/usr/local/Cellar/libxml2/2.7.8/lib --with-xslt-dir=/usr/local/Cellar/libxslt/1.1.26
|
41
|
+
|
18
42
|
===
|
19
43
|
|
20
44
|
This code is under the Apache License 2.0. http://www.apache.org/licenses/LICENSE-2.0
|
data/Rakefile
CHANGED
@@ -1,45 +1,6 @@
|
|
1
|
-
require
|
2
|
-
require '
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rspec/core/rake_task'
|
3
3
|
|
4
|
-
|
5
|
-
require 'jeweler'
|
6
|
-
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "ruby-readability"
|
8
|
-
gem.summary = %Q{Port of arc90's readability project to ruby}
|
9
|
-
gem.description = %Q{Port of arc90's readability project to ruby}
|
10
|
-
gem.email = "andrew@iterationlabs.com"
|
11
|
-
gem.homepage = "http://github.com/iterationlabs/ruby-readability"
|
12
|
-
gem.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
|
13
|
-
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
-
gem.add_dependency 'nokogiri', '>= 1.4.2'
|
15
|
-
end
|
16
|
-
Jeweler::GemcutterTasks.new
|
17
|
-
rescue LoadError
|
18
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
-
end
|
20
|
-
|
21
|
-
require 'spec/rake/spectask'
|
22
|
-
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
-
spec.libs << 'lib' << 'spec'
|
24
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
-
end
|
26
|
-
|
27
|
-
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
-
spec.libs << 'lib' << 'spec'
|
29
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
-
spec.rcov = true
|
31
|
-
end
|
32
|
-
|
33
|
-
task :spec => :check_dependencies
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
34
5
|
|
35
6
|
task :default => :spec
|
36
|
-
|
37
|
-
require 'rake/rdoctask'
|
38
|
-
Rake::RDocTask.new do |rdoc|
|
39
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
-
|
41
|
-
rdoc.rdoc_dir = 'rdoc'
|
42
|
-
rdoc.title = "ruby-readability #{version}"
|
43
|
-
rdoc.rdoc_files.include('README*')
|
44
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
-
end
|
data/bin/readability
CHANGED
@@ -2,12 +2,39 @@
|
|
2
2
|
$KCODE='u'
|
3
3
|
require 'rubygems'
|
4
4
|
require 'open-uri'
|
5
|
+
require 'optparse'
|
5
6
|
require File.dirname(__FILE__) + '/../lib/readability'
|
6
7
|
|
8
|
+
options = { :debug => false, :images => false }
|
9
|
+
options_parser = OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: #{File.basename($0)} [options] URL"
|
11
|
+
|
12
|
+
opts.on("-d", "--debug", "Show debug output") do |v|
|
13
|
+
options[:debug] = v
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on("-i", "--images", "Keep images and links") do |i|
|
17
|
+
options[:images] = i
|
18
|
+
end
|
19
|
+
|
20
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
21
|
+
puts opts
|
22
|
+
exit
|
23
|
+
end
|
24
|
+
end
|
25
|
+
options_parser.parse!
|
26
|
+
|
7
27
|
if ARGV.length != 1
|
8
|
-
STDERR.puts
|
28
|
+
STDERR.puts options_parser
|
9
29
|
exit 1
|
10
30
|
end
|
11
31
|
|
12
32
|
text = open(ARGV.first).read
|
13
|
-
|
33
|
+
if options[:images]
|
34
|
+
puts Readability::Document.new(text, :tags => %w[div p img a],
|
35
|
+
:attributes => %w[src href],
|
36
|
+
:remove_empty_nodes => false,
|
37
|
+
:debug => options[:debug]).content
|
38
|
+
else
|
39
|
+
puts Readability::Document.new(text, :debug => options[:debug]).content
|
40
|
+
end
|
data/lib/readability.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'nokogiri'
|
3
|
+
require 'guess_html_encoding'
|
3
4
|
|
4
5
|
module Readability
|
5
6
|
class Document
|
@@ -8,14 +9,22 @@ module Readability
|
|
8
9
|
:min_text_length => 25,
|
9
10
|
:remove_unlikely_candidates => true,
|
10
11
|
:weight_classes => true,
|
11
|
-
:clean_conditionally => true
|
12
|
+
:clean_conditionally => true,
|
13
|
+
:remove_empty_nodes => true
|
12
14
|
}.freeze
|
13
15
|
|
14
16
|
attr_accessor :options, :html
|
15
17
|
|
16
18
|
def initialize(input, options = {})
|
17
|
-
@input = input.gsub(REGEXES[:replaceBrsRe], '</p><p>').gsub(REGEXES[:replaceFontsRe], '<\1span>')
|
18
19
|
@options = DEFAULT_OPTIONS.merge(options)
|
20
|
+
@input = input
|
21
|
+
|
22
|
+
if RUBY_VERSION =~ /^1\.9\./ && !@options[:encoding]
|
23
|
+
@input = GuessHtmlEncoding.encode(@input, @options[:html_headers]) unless @options[:do_not_guess_encoding]
|
24
|
+
@options[:encoding] = @input.encoding.to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
@input = @input.gsub(REGEXES[:replaceBrsRe], '</p><p>').gsub(REGEXES[:replaceFontsRe], '<\1span>')
|
19
28
|
@remove_unlikely_candidates = @options[:remove_unlikely_candidates]
|
20
29
|
@weight_classes = @options[:weight_classes]
|
21
30
|
@clean_conditionally = @options[:clean_conditionally]
|
@@ -23,7 +32,7 @@ module Readability
|
|
23
32
|
end
|
24
33
|
|
25
34
|
def make_html
|
26
|
-
@html = Nokogiri::HTML(@input, nil,
|
35
|
+
@html = Nokogiri::HTML(@input, nil, @options[:encoding])
|
27
36
|
end
|
28
37
|
|
29
38
|
REGEXES = {
|
@@ -221,7 +230,7 @@ module Readability
|
|
221
230
|
# wrap text nodes in p tags
|
222
231
|
# elem.children.each do |child|
|
223
232
|
# if child.text?
|
224
|
-
|
233
|
+
# debug("wrapping text node with a p")
|
225
234
|
# child.swap("<p>#{child.text}</p>")
|
226
235
|
# end
|
227
236
|
# end
|
@@ -238,9 +247,11 @@ module Readability
|
|
238
247
|
elem.remove
|
239
248
|
end
|
240
249
|
|
241
|
-
|
242
|
-
|
243
|
-
|
250
|
+
if @options[:remove_empty_nodes]
|
251
|
+
# remove <p> tags that have no text content - this will also remove p tags that contain only images.
|
252
|
+
node.css("p").each do |elem|
|
253
|
+
elem.remove if elem.content.strip.empty?
|
254
|
+
end
|
244
255
|
end
|
245
256
|
|
246
257
|
# Conditionally clean <table>s, <ul>s, and <div>s
|
@@ -259,7 +270,6 @@ module Readability
|
|
259
270
|
base_replace_with_whitespace.each { |tag| replace_with_whitespace[tag] = true }
|
260
271
|
|
261
272
|
([node] + node.css("*")).each do |el|
|
262
|
-
|
263
273
|
# If element is in whitelist, delete all its attributes
|
264
274
|
if whitelist[el.node_name]
|
265
275
|
el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
|
@@ -267,10 +277,9 @@ module Readability
|
|
267
277
|
# Otherwise, replace the element with its contents
|
268
278
|
else
|
269
279
|
if replace_with_whitespace[el.node_name]
|
270
|
-
|
271
|
-
el.swap(' ' << el.text << ' ')
|
280
|
+
el.swap(Nokogiri::XML::Text.new(' ' << el.text << ' ', el.document))
|
272
281
|
else
|
273
|
-
el.swap(el.text)
|
282
|
+
el.swap(Nokogiri::XML::Text.new(el.text, el.document))
|
274
283
|
end
|
275
284
|
end
|
276
285
|
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'readability'
|
data/ruby-readability.gemspec
CHANGED
@@ -1,72 +1,24 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
5
3
|
|
6
4
|
Gem::Specification.new do |s|
|
7
|
-
s.name
|
8
|
-
s.version
|
9
|
-
|
10
|
-
s.
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
s.default_executable = %q{readability}
|
5
|
+
s.name = "ruby-readability"
|
6
|
+
s.version = '0.3.0.pre'
|
7
|
+
s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
|
8
|
+
s.email = ["andrew@iterationlabs.com"]
|
9
|
+
s.homepage = "http://github.com/iterationlabs/ruby-readability"
|
10
|
+
s.summary = %q{Port of arc90's readability project to ruby}
|
14
11
|
s.description = %q{Port of arc90's readability project to ruby}
|
15
|
-
s.email = %q{andrew@iterationlabs.com}
|
16
|
-
s.executables = ["readability"]
|
17
|
-
s.extra_rdoc_files = [
|
18
|
-
"README"
|
19
|
-
]
|
20
|
-
s.files = [
|
21
|
-
".document",
|
22
|
-
"README",
|
23
|
-
"Rakefile",
|
24
|
-
"VERSION",
|
25
|
-
"bin/readability",
|
26
|
-
"lib/readability.rb",
|
27
|
-
"ruby-readability.gemspec",
|
28
|
-
"spec/fixtures/cant_read.html",
|
29
|
-
"spec/fixtures/sample.html",
|
30
|
-
"spec/fixtures/samples/blogpost_with_links-fragments.rb",
|
31
|
-
"spec/fixtures/samples/blogpost_with_links.html",
|
32
|
-
"spec/fixtures/samples/channel4-1-fragments.rb",
|
33
|
-
"spec/fixtures/samples/channel4-1.html",
|
34
|
-
"spec/fixtures/samples/foxnews-india1-fragments.rb",
|
35
|
-
"spec/fixtures/samples/foxnews-india1.html",
|
36
|
-
"spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb",
|
37
|
-
"spec/fixtures/samples/globemail-ottawa-cuts.html",
|
38
|
-
"spec/fixtures/should_not_truncate.txt",
|
39
|
-
"spec/readability_spec.rb",
|
40
|
-
"spec/spec.opts",
|
41
|
-
"spec/spec_helper.rb"
|
42
|
-
]
|
43
|
-
s.homepage = %q{http://github.com/iterationlabs/ruby-readability}
|
44
|
-
s.require_paths = ["lib"]
|
45
|
-
s.rubygems_version = %q{1.3.7}
|
46
|
-
s.summary = %q{Port of arc90's readability project to ruby}
|
47
|
-
s.test_files = [
|
48
|
-
"spec/fixtures/samples/blogpost_with_links-fragments.rb",
|
49
|
-
"spec/fixtures/samples/channel4-1-fragments.rb",
|
50
|
-
"spec/fixtures/samples/foxnews-india1-fragments.rb",
|
51
|
-
"spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb",
|
52
|
-
"spec/readability_spec.rb",
|
53
|
-
"spec/spec_helper.rb"
|
54
|
-
]
|
55
12
|
|
56
|
-
|
57
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
58
|
-
s.specification_version = 3
|
13
|
+
s.rubyforge_project = "ruby-readability"
|
59
14
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
65
|
-
s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
|
66
|
-
end
|
67
|
-
else
|
68
|
-
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
69
|
-
s.add_dependency(%q<nokogiri>, [">= 1.4.2"])
|
70
|
-
end
|
71
|
-
end
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
72
19
|
|
20
|
+
s.add_development_dependency "rspec", ">= 2.6"
|
21
|
+
s.add_development_dependency "rr", ">= 1.0"
|
22
|
+
s.add_dependency 'nokogiri', '>= 1.4.2'
|
23
|
+
s.add_dependency 'guess_html_encoding', '>= 0.0.2'
|
24
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# encoding: UTF-8
|
2
2
|
# This sample originally from http://www.foxnews.com/world/2010/05/14/police-killed-bus-touches-high-voltage-wire-central-india/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed:+foxnews/latest+(Text+-+Latest+Headlines)
|
3
3
|
|
4
4
|
$required_fragments = [
|
data/spec/readability_spec.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
2
4
|
|
3
5
|
describe Readability do
|
4
6
|
before do
|
@@ -115,11 +117,9 @@ describe Readability do
|
|
115
117
|
b[:content_score] <=> a[:content_score]
|
116
118
|
}.first[:elem][:id].should == "body"
|
117
119
|
end
|
118
|
-
end
|
119
120
|
|
120
|
-
describe "score_paragraphs" do
|
121
121
|
context "when two consequent br tags are used instead of p" do
|
122
|
-
|
122
|
+
it "should assign the higher score to the first paragraph in this particular example" do
|
123
123
|
@doc = Readability::Document.new(<<-HTML)
|
124
124
|
<html>
|
125
125
|
<head>
|
@@ -140,9 +140,6 @@ describe Readability do
|
|
140
140
|
</html>
|
141
141
|
HTML
|
142
142
|
@candidates = @doc.score_paragraphs(0)
|
143
|
-
end
|
144
|
-
|
145
|
-
it "should assign the higher score to the first paragraph in this particular example" do
|
146
143
|
@candidates.values.sort_by { |a| -a[:content_score] }.first[:elem][:id].should == 'post1'
|
147
144
|
end
|
148
145
|
end
|
@@ -204,14 +201,13 @@ describe Readability do
|
|
204
201
|
end
|
205
202
|
|
206
203
|
it "should output expected fragments of text" do
|
207
|
-
|
208
204
|
checks = 0
|
209
205
|
@samples.each do |sample|
|
210
206
|
html = File.read(File.dirname(__FILE__) + "/fixtures/samples/#{sample}.html")
|
211
207
|
doc = Readability::Document.new(html).content
|
212
208
|
|
213
209
|
load "fixtures/samples/#{sample}-fragments.rb"
|
214
|
-
puts "testing #{sample}..."
|
210
|
+
#puts "testing #{sample}..."
|
215
211
|
|
216
212
|
$required_fragments.each do |required_text|
|
217
213
|
doc.should include(required_text)
|
@@ -223,7 +219,32 @@ describe Readability do
|
|
223
219
|
checks += 1
|
224
220
|
end
|
225
221
|
end
|
226
|
-
puts "Performed #{checks} checks."
|
222
|
+
#puts "Performed #{checks} checks."
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
describe "encoding guessing" do
|
227
|
+
if RUBY_VERSION =~ /^1\.9\./
|
228
|
+
context "with ruby 1.9.2" do
|
229
|
+
it "should correctly guess and enforce HTML encoding" do
|
230
|
+
doc = Readability::Document.new("<html><head><meta http-equiv='content-type' content='text/html; charset=LATIN1'></head><body><div>hi!</div></body></html>")
|
231
|
+
content = doc.content
|
232
|
+
content.encoding.to_s.should == "ISO-8859-1"
|
233
|
+
content.should be_valid_encoding
|
234
|
+
end
|
235
|
+
|
236
|
+
it "should allow encoding guessing to be skipped" do
|
237
|
+
do_not_allow(GuessHtmlEncoding).encode
|
238
|
+
doc = Readability::Document.new(@simple_html_fixture, :do_not_guess_encoding => true)
|
239
|
+
doc.content
|
240
|
+
end
|
241
|
+
|
242
|
+
it "should allow encoding guessing to be overridden" do
|
243
|
+
do_not_allow(GuessHtmlEncoding).encode
|
244
|
+
doc = Readability::Document.new(@simple_html_fixture, :encoding => "UTF-8")
|
245
|
+
doc.content
|
246
|
+
end
|
247
|
+
end
|
227
248
|
end
|
228
249
|
end
|
229
250
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
1
|
require 'rubygems'
|
4
2
|
require 'readability'
|
5
|
-
require '
|
6
|
-
require 'spec/autorun'
|
7
|
-
|
8
|
-
Spec::Runner.configure do |config|
|
3
|
+
require 'rr'
|
9
4
|
|
5
|
+
RSpec.configure do |config|
|
6
|
+
config.mock_with :rr
|
10
7
|
end
|
metadata
CHANGED
@@ -1,15 +1,10 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-readability
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 2
|
9
|
-
- 3
|
10
|
-
version: 0.2.3
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0.pre
|
5
|
+
prerelease: 6
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Andrew Cantino
|
14
9
|
- starrhorne
|
15
10
|
- libc
|
@@ -17,57 +12,68 @@ authors:
|
|
17
12
|
autorequire:
|
18
13
|
bindir: bin
|
19
14
|
cert_chain: []
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
dependencies:
|
24
|
-
- !ruby/object:Gem::Dependency
|
15
|
+
date: 2011-10-26 00:00:00.000000000Z
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
25
18
|
name: rspec
|
19
|
+
requirement: &70185897981180 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ! '>='
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '2.6'
|
25
|
+
type: :development
|
26
26
|
prerelease: false
|
27
|
-
|
27
|
+
version_requirements: *70185897981180
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rr
|
30
|
+
requirement: &70185897980680 !ruby/object:Gem::Requirement
|
28
31
|
none: false
|
29
|
-
requirements:
|
30
|
-
- -
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
|
33
|
-
segments:
|
34
|
-
- 1
|
35
|
-
- 2
|
36
|
-
- 9
|
37
|
-
version: 1.2.9
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '1.0'
|
38
36
|
type: :development
|
39
|
-
version_requirements: *id001
|
40
|
-
- !ruby/object:Gem::Dependency
|
41
|
-
name: nokogiri
|
42
37
|
prerelease: false
|
43
|
-
|
38
|
+
version_requirements: *70185897980680
|
39
|
+
- !ruby/object:Gem::Dependency
|
40
|
+
name: nokogiri
|
41
|
+
requirement: &70185897980220 !ruby/object:Gem::Requirement
|
44
42
|
none: false
|
45
|
-
requirements:
|
46
|
-
- -
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
hash: 3
|
49
|
-
segments:
|
50
|
-
- 1
|
51
|
-
- 4
|
52
|
-
- 2
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
53
46
|
version: 1.4.2
|
54
47
|
type: :runtime
|
55
|
-
|
48
|
+
prerelease: false
|
49
|
+
version_requirements: *70185897980220
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: guess_html_encoding
|
52
|
+
requirement: &70185897979760 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 0.0.2
|
58
|
+
type: :runtime
|
59
|
+
prerelease: false
|
60
|
+
version_requirements: *70185897979760
|
56
61
|
description: Port of arc90's readability project to ruby
|
57
|
-
email:
|
58
|
-
|
62
|
+
email:
|
63
|
+
- andrew@iterationlabs.com
|
64
|
+
executables:
|
59
65
|
- readability
|
60
66
|
extensions: []
|
61
|
-
|
62
|
-
|
63
|
-
- README
|
64
|
-
files:
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
65
69
|
- .document
|
70
|
+
- .gitignore
|
71
|
+
- Gemfile
|
66
72
|
- README
|
67
73
|
- Rakefile
|
68
|
-
- VERSION
|
69
74
|
- bin/readability
|
70
75
|
- lib/readability.rb
|
76
|
+
- lib/ruby-readability.rb
|
71
77
|
- ruby-readability.gemspec
|
72
78
|
- spec/fixtures/cant_read.html
|
73
79
|
- spec/fixtures/sample.html
|
@@ -83,44 +89,42 @@ files:
|
|
83
89
|
- spec/readability_spec.rb
|
84
90
|
- spec/spec.opts
|
85
91
|
- spec/spec_helper.rb
|
86
|
-
has_rdoc: true
|
87
92
|
homepage: http://github.com/iterationlabs/ruby-readability
|
88
93
|
licenses: []
|
89
|
-
|
90
94
|
post_install_message:
|
91
95
|
rdoc_options: []
|
92
|
-
|
93
|
-
require_paths:
|
96
|
+
require_paths:
|
94
97
|
- lib
|
95
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
99
|
none: false
|
97
|
-
requirements:
|
98
|
-
- -
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
|
101
|
-
|
102
|
-
- 0
|
103
|
-
version: "0"
|
104
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
|
-
requirements:
|
107
|
-
- -
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
|
110
|
-
segments:
|
111
|
-
- 0
|
112
|
-
version: "0"
|
106
|
+
requirements:
|
107
|
+
- - ! '>'
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.3.1
|
113
110
|
requirements: []
|
114
|
-
|
115
|
-
|
116
|
-
rubygems_version: 1.3.7
|
111
|
+
rubyforge_project: ruby-readability
|
112
|
+
rubygems_version: 1.8.6
|
117
113
|
signing_key:
|
118
114
|
specification_version: 3
|
119
115
|
summary: Port of arc90's readability project to ruby
|
120
|
-
test_files:
|
116
|
+
test_files:
|
117
|
+
- spec/fixtures/cant_read.html
|
118
|
+
- spec/fixtures/sample.html
|
121
119
|
- spec/fixtures/samples/blogpost_with_links-fragments.rb
|
120
|
+
- spec/fixtures/samples/blogpost_with_links.html
|
122
121
|
- spec/fixtures/samples/channel4-1-fragments.rb
|
122
|
+
- spec/fixtures/samples/channel4-1.html
|
123
123
|
- spec/fixtures/samples/foxnews-india1-fragments.rb
|
124
|
+
- spec/fixtures/samples/foxnews-india1.html
|
124
125
|
- spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb
|
126
|
+
- spec/fixtures/samples/globemail-ottawa-cuts.html
|
127
|
+
- spec/fixtures/should_not_truncate.txt
|
125
128
|
- spec/readability_spec.rb
|
129
|
+
- spec/spec.opts
|
126
130
|
- spec/spec_helper.rb
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.2.3
|