rgabo-readability 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/readability/harmonizable.rb +12 -1
- data/readability.gems +0 -22
- data/rgabo-readability.gemspec +2 -2
- data/spec/readability/readable_spec.rb +19 -7
- data/spec/readability/urls.yaml +4 -3
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.4
|
@@ -35,8 +35,19 @@ module Readability
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def harmony_page
|
38
|
+
html = self.to_html
|
39
|
+
|
40
|
+
# unless encoding is already utf[-8], convert using iconv
|
41
|
+
unless meta_encoding =~ /utf/i
|
42
|
+
begin
|
43
|
+
require 'iconv'
|
44
|
+
html = Iconv.new("UTF8", "LATIN1//TRANSLIT//IGNORE").iconv(self.to_html)
|
45
|
+
rescue
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
38
49
|
# load document into a page
|
39
|
-
page = Harmony::Page.new(
|
50
|
+
page = Harmony::Page.new(html)
|
40
51
|
|
41
52
|
# yield the page and reparse if a block is given
|
42
53
|
if block_given?
|
data/readability.gems
CHANGED
@@ -1,30 +1,8 @@
|
|
1
1
|
# readability.gems generated gem export file. Note that any env variable settings will be missing. Append these after using a ';' field separator
|
2
2
|
|
3
|
-
# nokogiri
|
4
3
|
nokogiri -v1.4.1
|
5
|
-
|
6
|
-
# harmony (johnson & envjs)
|
7
|
-
stackdeck -v0.2.0
|
8
|
-
johnson -v2.0.0.pre3
|
9
|
-
envjs -v0.3.1
|
10
4
|
harmony -v0.5.5
|
11
|
-
|
12
|
-
# tomdoc
|
13
|
-
hoe -v2.6.0
|
14
|
-
ParseTree -v3.0.5
|
15
|
-
RubyInline -v3.7.0
|
16
|
-
ruby_parser -v2.0.4
|
17
|
-
sexp_processor -v3.0.4
|
18
|
-
colored -v1.2
|
19
5
|
tomdoc -v0.1.0
|
20
|
-
|
21
|
-
# jeweler
|
22
|
-
gemcutter -v0.5.0
|
23
|
-
git -v1.2.5
|
24
|
-
json_pure -v1.4.3
|
25
|
-
rubyforge -v2.0.4
|
26
6
|
jeweler -v1.4.0
|
27
|
-
|
28
|
-
# rspec
|
29
7
|
rspec -v1.3.0
|
30
8
|
syntax -v1.0.0
|
data/rgabo-readability.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rgabo-readability}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Gabor Ratky"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-19}
|
13
13
|
s.description = %q{Extends Nokogiri::HTML::Document to run Arc90's Readability script and produces easy to read HTML documents.}
|
14
14
|
s.email = %q{rgabo@rgabostyle.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -72,16 +72,28 @@ describe Readability::Readable do
|
|
72
72
|
end
|
73
73
|
|
74
74
|
describe "Readability.js" do
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
75
|
+
class << self
|
76
|
+
def should_not_fail_on url
|
77
|
+
self.class_eval <<-EOF
|
78
|
+
it "should not fail on #{url}" do
|
79
|
+
@doc = Nokogiri::HTML(open("#{url}"))
|
80
|
+
|
81
|
+
# run readability in place
|
82
|
+
@doc.to_readable!
|
81
83
|
|
84
|
+
@doc.to_html.should include('Readability version 1.5.0')
|
85
|
+
end
|
86
|
+
EOF
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
YAML.load(File.open(File.join(File.dirname(__FILE__), 'urls.yaml'))).each do |url|
|
91
|
+
it "should not fail on #{url}" do
|
92
|
+
@doc = Nokogiri::HTML(open("#{url}"))
|
93
|
+
|
82
94
|
# run readability in place
|
83
95
|
@doc.to_readable!
|
84
|
-
|
96
|
+
|
85
97
|
@doc.to_html.should include('Readability version 1.5.0')
|
86
98
|
end
|
87
99
|
end
|
data/spec/readability/urls.yaml
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
# Array of URLs to test readability with
|
2
|
-
- 'http://techcrunch.com/2010/05/16/groupon-invades-europe-with-acquisition-of-citydeal'
|
3
2
|
- 'http://blogs.wsj.com/venturecapital/2010/05/14/despite-short-term-improvement-vc-10-year-index-goes-negative/?mod=rss_WSJBlog'
|
4
3
|
- 'http://www.engadget.com/2010/04/03/entelligence-the-ipad-as-a-productivity-tool/'
|
5
|
-
|
4
|
+
- 'http://www.huffingtonpost.com/2010/05/15/delete-facebook-account-q_n_576956.html'
|
6
5
|
- 'http://mashable.com/2010/05/16/in-defense-of-facebook/'
|
7
6
|
- 'http://feeds.venturebeat.com/~r/Venturebeat/~3/rYPVBROMiEI/'
|
8
|
-
- 'http://eu.techcrunch.com/2010/04/25/new-eu-rules-could-kill-off-european-vc-and-screw-startups-lets-stop-them/'
|
7
|
+
- 'http://eu.techcrunch.com/2010/04/25/new-eu-rules-could-kill-off-european-vc-and-screw-startups-lets-stop-them/'
|
8
|
+
- 'http://www.j-learning.org/promote_it/page/surveys/'
|
9
|
+
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rgabo-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 4
|
10
|
+
version: 0.1.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Gabor Ratky
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-19 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|