rgabo-readability 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -35,8 +35,19 @@ module Readability
35
35
  end
36
36
 
37
37
  def harmony_page
38
+ html = self.to_html
39
+
40
+ # unless encoding is already utf[-8], convert using iconv
41
+ unless meta_encoding =~ /utf/i
42
+ begin
43
+ require 'iconv'
44
+ html = Iconv.new("UTF8", "LATIN1//TRANSLIT//IGNORE").iconv(self.to_html)
45
+ rescue
46
+ end
47
+ end
48
+
38
49
  # load document into a page
39
- page = Harmony::Page.new(self.to_html)
50
+ page = Harmony::Page.new(html)
40
51
 
41
52
  # yield the page and reparse if a block is given
42
53
  if block_given?
@@ -1,30 +1,8 @@
1
1
  # readability.gems generated gem export file. Note that any env variable settings will be missing. Append these after using a ';' field separator
2
2
 
3
- # nokogiri
4
3
  nokogiri -v1.4.1
5
-
6
- # harmony (johnson & envjs)
7
- stackdeck -v0.2.0
8
- johnson -v2.0.0.pre3
9
- envjs -v0.3.1
10
4
  harmony -v0.5.5
11
-
12
- # tomdoc
13
- hoe -v2.6.0
14
- ParseTree -v3.0.5
15
- RubyInline -v3.7.0
16
- ruby_parser -v2.0.4
17
- sexp_processor -v3.0.4
18
- colored -v1.2
19
5
  tomdoc -v0.1.0
20
-
21
- # jeweler
22
- gemcutter -v0.5.0
23
- git -v1.2.5
24
- json_pure -v1.4.3
25
- rubyforge -v2.0.4
26
6
  jeweler -v1.4.0
27
-
28
- # rspec
29
7
  rspec -v1.3.0
30
8
  syntax -v1.0.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{rgabo-readability}
8
- s.version = "0.1.3"
8
+ s.version = "0.1.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Gabor Ratky"]
12
- s.date = %q{2010-05-17}
12
+ s.date = %q{2010-05-19}
13
13
  s.description = %q{Extends Nokogiri::HTML::Document to run Arc90's Readability script and produces easy to read HTML documents.}
14
14
  s.email = %q{rgabo@rgabostyle.com}
15
15
  s.extra_rdoc_files = [
@@ -72,16 +72,28 @@ describe Readability::Readable do
72
72
  end
73
73
 
74
74
  describe "Readability.js" do
75
- it "should not fail on any article" do
76
- urls = YAML.load(File.open(File.join(File.dirname(__FILE__), 'urls.yaml')))
77
-
78
- urls.each do |url|
79
- # load webpage
80
- @doc = Nokogiri::HTML(open(url))
75
+ class << self
76
+ def should_not_fail_on url
77
+ self.class_eval <<-EOF
78
+ it "should not fail on #{url}" do
79
+ @doc = Nokogiri::HTML(open("#{url}"))
80
+
81
+ # run readability in place
82
+ @doc.to_readable!
81
83
 
84
+ @doc.to_html.should include('Readability version 1.5.0')
85
+ end
86
+ EOF
87
+ end
88
+ end
89
+
90
+ YAML.load(File.open(File.join(File.dirname(__FILE__), 'urls.yaml'))).each do |url|
91
+ it "should not fail on #{url}" do
92
+ @doc = Nokogiri::HTML(open("#{url}"))
93
+
82
94
  # run readability in place
83
95
  @doc.to_readable!
84
-
96
+
85
97
  @doc.to_html.should include('Readability version 1.5.0')
86
98
  end
87
99
  end
@@ -1,8 +1,9 @@
1
1
  # Array of URLs to test readability with
2
- - 'http://techcrunch.com/2010/05/16/groupon-invades-europe-with-acquisition-of-citydeal'
3
2
  - 'http://blogs.wsj.com/venturecapital/2010/05/14/despite-short-term-improvement-vc-10-year-index-goes-negative/?mod=rss_WSJBlog'
4
3
  - 'http://www.engadget.com/2010/04/03/entelligence-the-ipad-as-a-productivity-tool/'
5
- # - 'http://www.huffingtonpost.com/2010/05/15/delete-facebook-account-q_n_576956.html'
4
+ - 'http://www.huffingtonpost.com/2010/05/15/delete-facebook-account-q_n_576956.html'
6
5
  - 'http://mashable.com/2010/05/16/in-defense-of-facebook/'
7
6
  - 'http://feeds.venturebeat.com/~r/Venturebeat/~3/rYPVBROMiEI/'
8
- - 'http://eu.techcrunch.com/2010/04/25/new-eu-rules-could-kill-off-european-vc-and-screw-startups-lets-stop-them/'
7
+ - 'http://eu.techcrunch.com/2010/04/25/new-eu-rules-could-kill-off-european-vc-and-screw-startups-lets-stop-them/'
8
+ - 'http://www.j-learning.org/promote_it/page/surveys/'
9
+
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rgabo-readability
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 3
10
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Gabor Ratky
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-17 00:00:00 +02:00
18
+ date: 2010-05-19 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency