readable 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/lib/readable.rb +9 -0
- data/lib/readable/rule.rb +23 -0
- data/lib/readable/rules/sina.rb +9 -0
- data/lib/readable/rules/techcrunch.rb +9 -0
- data/lib/readable/version.rb +4 -0
- data/lib/readable/webpage.rb +70 -0
- data/readable.gemspec +23 -0
- data/spec/pages/qq.html +1992 -0
- data/spec/pages/sina.html +1202 -0
- data/spec/pages/sohu.html +2311 -0
- data/spec/pages/techcrunch.html +1157 -0
- data/spec/pages/wordpress.html +1375 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/webpage_spec.rb +57 -0
- metadata +125 -0
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require File.join(File.dirname(__FILE__), '../lib/readable')
|
3
|
+
|
4
|
+
def read(filename)
|
5
|
+
file = File.open(File.join(File.dirname(__FILE__), filename), "r:binary")
|
6
|
+
content = file.read
|
7
|
+
file.close
|
8
|
+
content
|
9
|
+
end
|
10
|
+
|
11
|
+
def stub_page!(filename)
|
12
|
+
content = read(filename)
|
13
|
+
result = stub!(:result)
|
14
|
+
result.stub!(:body => content)
|
15
|
+
Net::HTTP.stub!(:start).and_return(result)
|
16
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), '/spec_helper')
|
4
|
+
|
5
|
+
describe Readable::Webpage, 'Encoding' do
|
6
|
+
it "should convert html into utf-8 according to charset of webpage" do
|
7
|
+
stub_page!('pages/sohu.html')
|
8
|
+
page = Readable::Webpage.new('http://test.com')
|
9
|
+
page.html.encoding.name.downcase.should == 'utf-8'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe Readable::Webpage, "Parse" do
|
14
|
+
it "should parse sohu correctly" do
|
15
|
+
stub_page!('pages/sohu.html')
|
16
|
+
page = Readable::Webpage.new('http://sohu.com')
|
17
|
+
page.title.should == '揭秘玫瑰怒放四部曲 投射能力增强才是蜕变之源'
|
18
|
+
page.content.should be_include('没错,德里克-罗斯是从小看着迈克尔-乔丹打球长大的,在他成为2008年的选秀状元之时,他也没想过自己能这么快达到如此高的巅峰,这也出乎了所有人的意料,因为完成这一切他仅仅用了3个赛季而以,本赛季的罗斯可谓一飞冲天,即便是他赛季之前就成宣称自己要成为“MVP”,人们也并没有把这句话放在心上。')
|
19
|
+
page.content.should_not be_include('网友关注排行')
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should parse qq news correctly" do
|
23
|
+
stub_page!('pages/qq.html')
|
24
|
+
page = Readable::Webpage.new('http://qq.com')
|
25
|
+
page.title.should == '卡扎菲政府军发言人在北约空袭中死亡'
|
26
|
+
page.content.should be_include('中新网5月16日电')
|
27
|
+
page.content.should be_include('因为这些设施是卡扎菲政权维持统治的工具。')
|
28
|
+
page.content.should_not be_include('每日推荐')
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should parse sina news correctly" do
|
32
|
+
stub_page!('pages/sina.html')
|
33
|
+
page = Readable::Webpage.new('http://sina.com.cn')
|
34
|
+
page.title.should == '巴基斯坦同意今日归还美绝密隐身直升机残骸'
|
35
|
+
page.content.should be_include('环球网记者')
|
36
|
+
page.content.should be_include('抵达展开准备工作')
|
37
|
+
page.content.should_not be_include('军事论坛')
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should parse techcrunch correctly" do
|
41
|
+
stub_page!('pages/techcrunch.html')
|
42
|
+
page = Readable::Webpage.new('http://techcrunch.com')
|
43
|
+
page.title.should be_include('Chrome OS 12')
|
44
|
+
page.content.should be_include('Last week, just prior to day one of Google I/O')
|
45
|
+
page.content.should be_include('what the first Chromebooks will feel like, check it out.')
|
46
|
+
page.content.should_not be_include('Jobs')
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should parse wordpress correctly" do
|
50
|
+
stub_page!('pages/wordpress.html')
|
51
|
+
page = Readable::Webpage.new('http://wordpress.com')
|
52
|
+
page.title.should be_include('My Weekend')
|
53
|
+
page.content.should be_include('It is said that Greek people')
|
54
|
+
page.content.should be_include('Kate')
|
55
|
+
page.content.should_not be_include('You ARE a Goddess')
|
56
|
+
end
|
57
|
+
end
|
metadata
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: readable
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Zhang Yuanyi
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-05-17 00:00:00 +08:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :development
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: nokogiri
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: sanitize
|
48
|
+
prerelease: false
|
49
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
type: :runtime
|
58
|
+
version_requirements: *id003
|
59
|
+
description: Readable provides a more confortable way to read web.
|
60
|
+
email:
|
61
|
+
- zhangyuanyi@gmail.com
|
62
|
+
executables: []
|
63
|
+
|
64
|
+
extensions: []
|
65
|
+
|
66
|
+
extra_rdoc_files: []
|
67
|
+
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- Gemfile
|
71
|
+
- Rakefile
|
72
|
+
- lib/readable.rb
|
73
|
+
- lib/readable/rule.rb
|
74
|
+
- lib/readable/rules/sina.rb
|
75
|
+
- lib/readable/rules/techcrunch.rb
|
76
|
+
- lib/readable/version.rb
|
77
|
+
- lib/readable/webpage.rb
|
78
|
+
- readable.gemspec
|
79
|
+
- spec/pages/qq.html
|
80
|
+
- spec/pages/sina.html
|
81
|
+
- spec/pages/sohu.html
|
82
|
+
- spec/pages/techcrunch.html
|
83
|
+
- spec/pages/wordpress.html
|
84
|
+
- spec/spec_helper.rb
|
85
|
+
- spec/webpage_spec.rb
|
86
|
+
has_rdoc: true
|
87
|
+
homepage: ""
|
88
|
+
licenses: []
|
89
|
+
|
90
|
+
post_install_message:
|
91
|
+
rdoc_options: []
|
92
|
+
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
none: false
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
segments:
|
101
|
+
- 0
|
102
|
+
version: "0"
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
none: false
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
segments:
|
109
|
+
- 0
|
110
|
+
version: "0"
|
111
|
+
requirements: []
|
112
|
+
|
113
|
+
rubyforge_project:
|
114
|
+
rubygems_version: 1.3.7
|
115
|
+
signing_key:
|
116
|
+
specification_version: 3
|
117
|
+
summary: Readable make web content easier to read
|
118
|
+
test_files:
|
119
|
+
- spec/pages/qq.html
|
120
|
+
- spec/pages/sina.html
|
121
|
+
- spec/pages/sohu.html
|
122
|
+
- spec/pages/techcrunch.html
|
123
|
+
- spec/pages/wordpress.html
|
124
|
+
- spec/spec_helper.rb
|
125
|
+
- spec/webpage_spec.rb
|