rgabo-readability 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -35,8 +35,19 @@ module Readability
35
35
  end
36
36
 
37
37
  def harmony_page
38
+ html = self.to_html
39
+
40
+ # unless encoding is already utf[-8], convert using iconv
41
+ unless meta_encoding =~ /utf/i
42
+ begin
43
+ require 'iconv'
44
+ html = Iconv.new("UTF8", "LATIN1//TRANSLIT//IGNORE").iconv(self.to_html)
45
+ rescue
46
+ end
47
+ end
48
+
38
49
  # load document into a page
39
- page = Harmony::Page.new(self.to_html)
50
+ page = Harmony::Page.new(html)
40
51
 
41
52
  # yield the page and reparse if a block is given
42
53
  if block_given?
@@ -1,30 +1,8 @@
1
1
  # readability.gems generated gem export file. Note that any env variable settings will be missing. Append these after using a ';' field separator
2
2
 
3
- # nokogiri
4
3
  nokogiri -v1.4.1
5
-
6
- # harmony (johnson & envjs)
7
- stackdeck -v0.2.0
8
- johnson -v2.0.0.pre3
9
- envjs -v0.3.1
10
4
  harmony -v0.5.5
11
-
12
- # tomdoc
13
- hoe -v2.6.0
14
- ParseTree -v3.0.5
15
- RubyInline -v3.7.0
16
- ruby_parser -v2.0.4
17
- sexp_processor -v3.0.4
18
- colored -v1.2
19
5
  tomdoc -v0.1.0
20
-
21
- # jeweler
22
- gemcutter -v0.5.0
23
- git -v1.2.5
24
- json_pure -v1.4.3
25
- rubyforge -v2.0.4
26
6
  jeweler -v1.4.0
27
-
28
- # rspec
29
7
  rspec -v1.3.0
30
8
  syntax -v1.0.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{rgabo-readability}
8
- s.version = "0.1.3"
8
+ s.version = "0.1.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Gabor Ratky"]
12
- s.date = %q{2010-05-17}
12
+ s.date = %q{2010-05-19}
13
13
  s.description = %q{Extends Nokogiri::HTML::Document to run Arc90's Readability script and produces easy to read HTML documents.}
14
14
  s.email = %q{rgabo@rgabostyle.com}
15
15
  s.extra_rdoc_files = [
@@ -72,16 +72,28 @@ describe Readability::Readable do
72
72
  end
73
73
 
74
74
  describe "Readability.js" do
75
- it "should not fail on any article" do
76
- urls = YAML.load(File.open(File.join(File.dirname(__FILE__), 'urls.yaml')))
77
-
78
- urls.each do |url|
79
- # load webpage
80
- @doc = Nokogiri::HTML(open(url))
75
+ class << self
76
+ def should_not_fail_on url
77
+ self.class_eval <<-EOF
78
+ it "should not fail on #{url}" do
79
+ @doc = Nokogiri::HTML(open("#{url}"))
80
+
81
+ # run readability in place
82
+ @doc.to_readable!
81
83
 
84
+ @doc.to_html.should include('Readability version 1.5.0')
85
+ end
86
+ EOF
87
+ end
88
+ end
89
+
90
+ YAML.load(File.open(File.join(File.dirname(__FILE__), 'urls.yaml'))).each do |url|
91
+ it "should not fail on #{url}" do
92
+ @doc = Nokogiri::HTML(open("#{url}"))
93
+
82
94
  # run readability in place
83
95
  @doc.to_readable!
84
-
96
+
85
97
  @doc.to_html.should include('Readability version 1.5.0')
86
98
  end
87
99
  end
@@ -1,8 +1,9 @@
1
1
  # Array of URLs to test readability with
2
- - 'http://techcrunch.com/2010/05/16/groupon-invades-europe-with-acquisition-of-citydeal'
3
2
  - 'http://blogs.wsj.com/venturecapital/2010/05/14/despite-short-term-improvement-vc-10-year-index-goes-negative/?mod=rss_WSJBlog'
4
3
  - 'http://www.engadget.com/2010/04/03/entelligence-the-ipad-as-a-productivity-tool/'
5
- # - 'http://www.huffingtonpost.com/2010/05/15/delete-facebook-account-q_n_576956.html'
4
+ - 'http://www.huffingtonpost.com/2010/05/15/delete-facebook-account-q_n_576956.html'
6
5
  - 'http://mashable.com/2010/05/16/in-defense-of-facebook/'
7
6
  - 'http://feeds.venturebeat.com/~r/Venturebeat/~3/rYPVBROMiEI/'
8
- - 'http://eu.techcrunch.com/2010/04/25/new-eu-rules-could-kill-off-european-vc-and-screw-startups-lets-stop-them/'
7
+ - 'http://eu.techcrunch.com/2010/04/25/new-eu-rules-could-kill-off-european-vc-and-screw-startups-lets-stop-them/'
8
+ - 'http://www.j-learning.org/promote_it/page/surveys/'
9
+
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rgabo-readability
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 3
10
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Gabor Ratky
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-17 00:00:00 +02:00
18
+ date: 2010-05-19 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency