readable 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require File.join(File.dirname(__FILE__), '../lib/readable')
3
+
4
+ def read(filename)
5
+ file = File.open(File.join(File.dirname(__FILE__), filename), "r:binary")
6
+ content = file.read
7
+ file.close
8
+ content
9
+ end
10
+
11
+ def stub_page!(filename)
12
+ content = read(filename)
13
+ result = stub!(:result)
14
+ result.stub!(:body => content)
15
+ Net::HTTP.stub!(:start).and_return(result)
16
+ end
@@ -0,0 +1,57 @@
1
+ # -*- encoding : utf-8 -*-
2
+
3
+ require File.join(File.dirname(__FILE__), '/spec_helper')
4
+
5
+ describe Readable::Webpage, 'Encoding' do
6
+ it "should convert html into utf-8 according to charset of webpage" do
7
+ stub_page!('pages/sohu.html')
8
+ page = Readable::Webpage.new('http://test.com')
9
+ page.html.encoding.name.downcase.should == 'utf-8'
10
+ end
11
+ end
12
+
13
+ describe Readable::Webpage, "Parse" do
14
+ it "should parse sohu correctly" do
15
+ stub_page!('pages/sohu.html')
16
+ page = Readable::Webpage.new('http://sohu.com')
17
+ page.title.should == '揭秘玫瑰怒放四部曲 投射能力增强才是蜕变之源'
18
+ page.content.should be_include('没错,德里克-罗斯是从小看着迈克尔-乔丹打球长大的,在他成为2008年的选秀状元之时,他也没想过自己能这么快达到如此高的巅峰,这也出乎了所有人的意料,因为完成这一切他仅仅用了3个赛季而以,本赛季的罗斯可谓一飞冲天,即便是他赛季之前就成宣称自己要成为“MVP”,人们也并没有把这句话放在心上。')
19
+ page.content.should_not be_include('网友关注排行')
20
+ end
21
+
22
+ it "should parse qq news correctly" do
23
+ stub_page!('pages/qq.html')
24
+ page = Readable::Webpage.new('http://qq.com')
25
+ page.title.should == '卡扎菲政府军发言人在北约空袭中死亡'
26
+ page.content.should be_include('中新网5月16日电')
27
+ page.content.should be_include('因为这些设施是卡扎菲政权维持统治的工具。')
28
+ page.content.should_not be_include('每日推荐')
29
+ end
30
+
31
+ it "should parse sina news correctly" do
32
+ stub_page!('pages/sina.html')
33
+ page = Readable::Webpage.new('http://sina.com.cn')
34
+ page.title.should == '巴基斯坦同意今日归还美绝密隐身直升机残骸'
35
+ page.content.should be_include('环球网记者')
36
+ page.content.should be_include('抵达展开准备工作')
37
+ page.content.should_not be_include('军事论坛')
38
+ end
39
+
40
+ it "should parse techcrunch correctly" do
41
+ stub_page!('pages/techcrunch.html')
42
+ page = Readable::Webpage.new('http://techcrunch.com')
43
+ page.title.should be_include('Chrome OS 12')
44
+ page.content.should be_include('Last week, just prior to day one of Google I/O')
45
+ page.content.should be_include('what the first Chromebooks will feel like, check it out.')
46
+ page.content.should_not be_include('Jobs')
47
+ end
48
+
49
+ it "should parse wordpress correctly" do
50
+ stub_page!('pages/wordpress.html')
51
+ page = Readable::Webpage.new('http://wordpress.com')
52
+ page.title.should be_include('My Weekend')
53
+ page.content.should be_include('It is said that Greek people')
54
+ page.content.should be_include('Kate')
55
+ page.content.should_not be_include('You ARE a Goddess')
56
+ end
57
+ end
metadata ADDED
@@ -0,0 +1,125 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: readable
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Zhang Yuanyi
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-05-17 00:00:00 +08:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: nokogiri
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ version: "0"
44
+ type: :runtime
45
+ version_requirements: *id002
46
+ - !ruby/object:Gem::Dependency
47
+ name: sanitize
48
+ prerelease: false
49
+ requirement: &id003 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ type: :runtime
58
+ version_requirements: *id003
59
+ description: Readable provides a more confortable way to read web.
60
+ email:
61
+ - zhangyuanyi@gmail.com
62
+ executables: []
63
+
64
+ extensions: []
65
+
66
+ extra_rdoc_files: []
67
+
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - Rakefile
72
+ - lib/readable.rb
73
+ - lib/readable/rule.rb
74
+ - lib/readable/rules/sina.rb
75
+ - lib/readable/rules/techcrunch.rb
76
+ - lib/readable/version.rb
77
+ - lib/readable/webpage.rb
78
+ - readable.gemspec
79
+ - spec/pages/qq.html
80
+ - spec/pages/sina.html
81
+ - spec/pages/sohu.html
82
+ - spec/pages/techcrunch.html
83
+ - spec/pages/wordpress.html
84
+ - spec/spec_helper.rb
85
+ - spec/webpage_spec.rb
86
+ has_rdoc: true
87
+ homepage: ""
88
+ licenses: []
89
+
90
+ post_install_message:
91
+ rdoc_options: []
92
+
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ segments:
101
+ - 0
102
+ version: "0"
103
+ required_rubygems_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ segments:
109
+ - 0
110
+ version: "0"
111
+ requirements: []
112
+
113
+ rubyforge_project:
114
+ rubygems_version: 1.3.7
115
+ signing_key:
116
+ specification_version: 3
117
+ summary: Readable make web content easier to read
118
+ test_files:
119
+ - spec/pages/qq.html
120
+ - spec/pages/sina.html
121
+ - spec/pages/sohu.html
122
+ - spec/pages/techcrunch.html
123
+ - spec/pages/wordpress.html
124
+ - spec/spec_helper.rb
125
+ - spec/webpage_spec.rb