readable 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,16 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require File.join(File.dirname(__FILE__), '../lib/readable')
3
+
4
+ def read(filename)
5
+ file = File.open(File.join(File.dirname(__FILE__), filename), "r:binary")
6
+ content = file.read
7
+ file.close
8
+ content
9
+ end
10
+
11
+ def stub_page!(filename)
12
+ content = read(filename)
13
+ result = stub!(:result)
14
+ result.stub!(:body => content)
15
+ Net::HTTP.stub!(:start).and_return(result)
16
+ end
@@ -0,0 +1,57 @@
1
+ # -*- encoding : utf-8 -*-
2
+
3
+ require File.join(File.dirname(__FILE__), '/spec_helper')
4
+
5
+ describe Readable::Webpage, 'Encoding' do
6
+ it "should convert html into utf-8 according to charset of webpage" do
7
+ stub_page!('pages/sohu.html')
8
+ page = Readable::Webpage.new('http://test.com')
9
+ page.html.encoding.name.downcase.should == 'utf-8'
10
+ end
11
+ end
12
+
13
+ describe Readable::Webpage, "Parse" do
14
+ it "should parse sohu correctly" do
15
+ stub_page!('pages/sohu.html')
16
+ page = Readable::Webpage.new('http://sohu.com')
17
+ page.title.should == '揭秘玫瑰怒放四部曲 投射能力增强才是蜕变之源'
18
+ page.content.should be_include('没错,德里克-罗斯是从小看着迈克尔-乔丹打球长大的,在他成为2008年的选秀状元之时,他也没想过自己能这么快达到如此高的巅峰,这也出乎了所有人的意料,因为完成这一切他仅仅用了3个赛季而以,本赛季的罗斯可谓一飞冲天,即便是他赛季之前就成宣称自己要成为“MVP”,人们也并没有把这句话放在心上。')
19
+ page.content.should_not be_include('网友关注排行')
20
+ end
21
+
22
+ it "should parse qq news correctly" do
23
+ stub_page!('pages/qq.html')
24
+ page = Readable::Webpage.new('http://qq.com')
25
+ page.title.should == '卡扎菲政府军发言人在北约空袭中死亡'
26
+ page.content.should be_include('中新网5月16日电')
27
+ page.content.should be_include('因为这些设施是卡扎菲政权维持统治的工具。')
28
+ page.content.should_not be_include('每日推荐')
29
+ end
30
+
31
+ it "should parse sina news correctly" do
32
+ stub_page!('pages/sina.html')
33
+ page = Readable::Webpage.new('http://sina.com.cn')
34
+ page.title.should == '巴基斯坦同意今日归还美绝密隐身直升机残骸'
35
+ page.content.should be_include('环球网记者')
36
+ page.content.should be_include('抵达展开准备工作')
37
+ page.content.should_not be_include('军事论坛')
38
+ end
39
+
40
+ it "should parse techcrunch correctly" do
41
+ stub_page!('pages/techcrunch.html')
42
+ page = Readable::Webpage.new('http://techcrunch.com')
43
+ page.title.should be_include('Chrome OS 12')
44
+ page.content.should be_include('Last week, just prior to day one of Google I/O')
45
+ page.content.should be_include('what the first Chromebooks will feel like, check it out.')
46
+ page.content.should_not be_include('Jobs')
47
+ end
48
+
49
+ it "should parse wordpress correctly" do
50
+ stub_page!('pages/wordpress.html')
51
+ page = Readable::Webpage.new('http://wordpress.com')
52
+ page.title.should be_include('My Weekend')
53
+ page.content.should be_include('It is said that Greek people')
54
+ page.content.should be_include('Kate')
55
+ page.content.should_not be_include('You ARE a Goddess')
56
+ end
57
+ end
metadata ADDED
@@ -0,0 +1,125 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: readable
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Zhang Yuanyi
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-05-17 00:00:00 +08:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: nokogiri
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ version: "0"
44
+ type: :runtime
45
+ version_requirements: *id002
46
+ - !ruby/object:Gem::Dependency
47
+ name: sanitize
48
+ prerelease: false
49
+ requirement: &id003 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ type: :runtime
58
+ version_requirements: *id003
59
+ description: Readable provides a more confortable way to read web.
60
+ email:
61
+ - zhangyuanyi@gmail.com
62
+ executables: []
63
+
64
+ extensions: []
65
+
66
+ extra_rdoc_files: []
67
+
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - Rakefile
72
+ - lib/readable.rb
73
+ - lib/readable/rule.rb
74
+ - lib/readable/rules/sina.rb
75
+ - lib/readable/rules/techcrunch.rb
76
+ - lib/readable/version.rb
77
+ - lib/readable/webpage.rb
78
+ - readable.gemspec
79
+ - spec/pages/qq.html
80
+ - spec/pages/sina.html
81
+ - spec/pages/sohu.html
82
+ - spec/pages/techcrunch.html
83
+ - spec/pages/wordpress.html
84
+ - spec/spec_helper.rb
85
+ - spec/webpage_spec.rb
86
+ has_rdoc: true
87
+ homepage: ""
88
+ licenses: []
89
+
90
+ post_install_message:
91
+ rdoc_options: []
92
+
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ segments:
101
+ - 0
102
+ version: "0"
103
+ required_rubygems_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ segments:
109
+ - 0
110
+ version: "0"
111
+ requirements: []
112
+
113
+ rubyforge_project:
114
+ rubygems_version: 1.3.7
115
+ signing_key:
116
+ specification_version: 3
117
+ summary: Readable make web content easier to read
118
+ test_files:
119
+ - spec/pages/qq.html
120
+ - spec/pages/sina.html
121
+ - spec/pages/sohu.html
122
+ - spec/pages/techcrunch.html
123
+ - spec/pages/wordpress.html
124
+ - spec/spec_helper.rb
125
+ - spec/webpage_spec.rb