query 0.0.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -1
- data/Gemfile +3 -1
- data/README.md +6 -1
- data/lib/query/engine/baidu.rb +12 -8
- data/lib/query/engine/baidu_mobile.rb +4 -4
- data/lib/query/engine/{qihoo.rb → qihu.rb} +8 -3
- data/lib/query/engine/{qihoo_mobile.rb → qihu_mobile.rb} +0 -0
- data/lib/query/engine/sogou.rb +45 -0
- data/lib/query/engine/sogou_mobile.rb +21 -0
- data/lib/query/engine.rb +11 -4
- data/lib/query/result/baidu.rb +57 -91
- data/lib/query/result/baidu_mobile.rb +49 -93
- data/lib/query/result/qihu.rb +66 -0
- data/lib/query/result/{qihoo_mobile.rb → qihu_mobile.rb} +1 -1
- data/lib/query/result/sogou.rb +103 -0
- data/lib/query/result/sogou_mobile.rb +51 -0
- data/lib/query/result.rb +47 -4
- data/lib/query/version.rb +1 -1
- data/lib/query.rb +6 -8
- data/query.gemspec +2 -3
- data/spec/baidu1_spec.rb +157 -0
- data/spec/baidu2_spec.rb +156 -0
- data/spec/mbaidu1_spec.rb +167 -0
- data/spec/msogou_spec.rb +91 -0
- data/spec/qihu_spec.rb +87 -0
- data/spec/samples/baidu1.html +521 -0
- data/spec/samples/baidu2.html +662 -0
- data/spec/samples/mbaidu1.html +2 -0
- data/spec/samples/mbaidu2.html +2 -0
- data/spec/samples/msogou.html +474 -0
- data/spec/samples/qihu.html +506 -0
- data/spec/samples/sogou.html +629 -0
- data/spec/sogou_mobile_spec.rb +86 -0
- data/spec/sogou_spec.rb +107 -0
- data/spec/spec_helper.rb +12 -1
- metadata +56 -31
- data/lib/query/engine/base.rb +0 -16
- data/lib/query/result/base.rb +0 -50
- data/lib/query/result/qihoo.rb +0 -75
- data/spec/baidu_mobile_spec.rb +0 -19
- data/spec/baidu_spec.rb +0 -73
- data/spec/qihoo_spec.rb +0 -27
@@ -0,0 +1,167 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
#coding:UTF-8
|
3
|
+
require 'spec_helper'
|
4
|
+
describe Query::Result::BaiduMobile do
|
5
|
+
subject{Query::Result::BaiduMobile.new(File.read($sample_mbaidu1))}
|
6
|
+
|
7
|
+
it "can click the next page button" do
|
8
|
+
subject.next_url.should == 'http://m.baidu.com/from=844b/s?pn=10&usm=3&st=11108i&word=%E9%85%92%E5%BA%97&sa=np&ms=1'
|
9
|
+
end
|
10
|
+
|
11
|
+
it "cannot count results" do
|
12
|
+
subject.count.should be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
describe '#seo_ranks' do
|
16
|
+
it "should put hotel.qunar.com to be on first" do
|
17
|
+
subject.seo_ranks.first[:host].should == 'h.qunar.com'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should put 酒店查询与预订 to be the first title" do
|
21
|
+
subject.seo_ranks.first[:text].should == '酒店查询与预订'
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should put 'http://map.baidu.com/mobile/webapp/search/search/qt=s&wd=%E9%85%92%E5%BA%97&c=131&b=&l=1¢er_rank=1&nb_x=&nb_y=/?third_party=webapp-aladdin' to be the second url" do
|
25
|
+
subject.seo_ranks[1][:href].should == 'http://map.baidu.com/mobile/webapp/search/search/qt=s&wd=%E9%85%92%E5%BA%97&c=131&b=&l=1¢er_rank=1&nb_x=&nb_y=/?third_party=webapp-aladdin'
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should have href,text,host elements for each seo result" do
|
29
|
+
subject.seo_ranks.each do |seo_rank|
|
30
|
+
seo_rank[:href].should_not == nil
|
31
|
+
seo_rank[:text].should_not == nil
|
32
|
+
seo_rank[:host].should_not == nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '#ads_top' do
|
38
|
+
it "has no top ads" do
|
39
|
+
subject.ads_top.size.should == 0
|
40
|
+
end
|
41
|
+
|
42
|
+
it "has no top ads" do
|
43
|
+
subject.ads_top[0].should be_nil
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '#ads_right' do
|
49
|
+
it "has no bottom ads" do
|
50
|
+
subject.ads_right.size.should == 0
|
51
|
+
end
|
52
|
+
|
53
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
54
|
+
subject.ads_right.class.should == Array
|
55
|
+
subject.ads_right.each do |ad_right|
|
56
|
+
ad_right.should have_key(:rank)
|
57
|
+
ad_right.should have_key(:host)
|
58
|
+
ad_right.should have_key(:href)
|
59
|
+
ad_right.should have_key(:text)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
describe '#ads_bottom' do
|
65
|
+
it "has 3 bottom ads" do
|
66
|
+
subject.ads_bottom.size.should == 3
|
67
|
+
end
|
68
|
+
|
69
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
70
|
+
subject.ads_bottom.class.should == Array
|
71
|
+
subject.ads_bottom.each do |ad_bottom|
|
72
|
+
ad_bottom.should have_key(:rank)
|
73
|
+
ad_bottom.should have_key(:host)
|
74
|
+
ad_bottom.should have_key(:href)
|
75
|
+
ad_bottom.should have_key(:text)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
__END__
|
81
|
+
describe Query::Engine::Baidu do
|
82
|
+
page = Query::Engine::Baidu.query '百度'
|
83
|
+
|
84
|
+
it "should return Query::Result::Baidu" do
|
85
|
+
page.class.should == Query::Result::Baidu
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should return 100,000,000" do
|
89
|
+
page.count.should > 100000
|
90
|
+
end
|
91
|
+
it "should return 1" do
|
92
|
+
page.rank('www.baidu.com').should == 1
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should return Query::Result::Baidu" do
|
96
|
+
page.next.class.should == Query::Result::Baidu
|
97
|
+
end
|
98
|
+
|
99
|
+
it "should return true" do
|
100
|
+
bool = Query::Engine::Baidu.popular?'百度'
|
101
|
+
bool.should == true
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should return false" do
|
105
|
+
bool = Query::Engine::Baidu.popular?'lavataliuming'
|
106
|
+
bool.should == false
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should return over 5 words beginning with the query_word" do
|
110
|
+
query_word = '为'
|
111
|
+
suggestions = Query::Engine::Baidu.suggestions(query_word)
|
112
|
+
suggestions.size.should > 5
|
113
|
+
suggestions.each do |suggestion|
|
114
|
+
suggestion[0].should == query_word
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should return 100,000,000" do
|
119
|
+
result = baidu.pages('baidu.com')
|
120
|
+
result.class.should == Query::Result::Baidu
|
121
|
+
result.count.should == 100000000
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should return 100,000,000" do
|
125
|
+
result = baidu.links('baidu.com')
|
126
|
+
result.class.should == Query::Result::Baidu
|
127
|
+
result.count.should == 100000000
|
128
|
+
end
|
129
|
+
it "should return 100,000,000" do
|
130
|
+
result = baidu.pages_with('baidu.com','baidu.com')
|
131
|
+
result.class.should == Query::Result::Baidu
|
132
|
+
result.count.should == 100000000
|
133
|
+
end
|
134
|
+
it "查询已经被收录的页面收录情况时,应返回true" do
|
135
|
+
baidu.indexed?('http://www.baidu.com').should == true
|
136
|
+
end
|
137
|
+
it "查询一个不存在的页面收录情况时,应返回true" do
|
138
|
+
baidu.indexed?('http://zxv.not-exists.com').should == false
|
139
|
+
end
|
140
|
+
page1 = Query::Engine::Baidu.query('seoaqua.com')
|
141
|
+
it "查询结果应该都能拿到title,href,host" do
|
142
|
+
page1.seo_ranks.each do |id,rank|
|
143
|
+
rank['href'].should_not == nil
|
144
|
+
rank['text'].should_not == nil
|
145
|
+
rank['host'].should_not == nil
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
__END__
|
150
|
+
require 'spec_helper'
|
151
|
+
describe Query::Engine::BaiduMobile do
|
152
|
+
mbaidu = Query::Engine::BaiduMobile.new
|
153
|
+
page = mbaidu.query '百度'
|
154
|
+
it "应返回#{Query::Engine::BaiduMobile}" do
|
155
|
+
page.class.should == Query::Result::BaiduMobile
|
156
|
+
end
|
157
|
+
it "下一页也应是Query::Engine::BaiduMobile" do
|
158
|
+
page.next.class.should == Query::Result::BaiduMobile
|
159
|
+
page.next.next.class.should == Query::Result::BaiduMobile
|
160
|
+
end
|
161
|
+
it "百度百科域名应该大于1" do
|
162
|
+
page.rank('wapbaike.baidu.com').should > 1
|
163
|
+
end
|
164
|
+
it "百度无线域名应该在10以内" do
|
165
|
+
page.rank('m.baidu.com').should < 11
|
166
|
+
end
|
167
|
+
end
|
data/spec/msogou_spec.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Query::Result::SogouMobile do
|
4
|
+
subject{Query::Result::SogouMobile.new(File.read($sample_msogou))}
|
5
|
+
|
6
|
+
it "can click the next page button" do
|
7
|
+
subject.next_url.should == './searchList.jsp?p=2&type=1&keyword=%E9%85%92%E5%BA%97%E9%A2%84%E8%AE%A2&uID=-xn_vif1ZEBEHaV4&sz=2-2&v=5&suuid=0946c9c5-f40d-42e0-ad2a-06e31fa97436'
|
8
|
+
end
|
9
|
+
|
10
|
+
it "have over 1000 results" do
|
11
|
+
subject.count.should be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
describe '#seo_ranks' do
|
16
|
+
it "should put u.ctrip.com to be on first" do
|
17
|
+
pending('sogou vr干扰')
|
18
|
+
subject.seo_ranks.first[:host].should == 'm.ctrip.com'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should put 北京酒店查询预订_携程旅行网 to be the first title" do
|
22
|
+
pending('sogou vr干扰')
|
23
|
+
subject.seo_ranks.first[:text].should == '北京酒店查询预订_携程旅行网 '
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should put 'http://jiudian.qunar.com/' to be the second url" do
|
27
|
+
pending('sogou vr干扰')
|
28
|
+
subject.seo_ranks[1][:href].should == 'http://jiudian.qunar.com/'
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should have href,text,host elements for each seo result" do
|
32
|
+
subject.seo_ranks.each do |seo_rank|
|
33
|
+
seo_rank[:href].should_not == nil
|
34
|
+
seo_rank[:text].should_not == nil
|
35
|
+
seo_rank[:host].should_not == nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#ads_top' do
|
41
|
+
it "should have 6 top ads" do
|
42
|
+
subject.ads_top.size.should == 3
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should find hotel.elong.com at the first position in the top ads" do
|
46
|
+
subject.ads_top[0][:host].should == 'www.agoda.com'
|
47
|
+
end
|
48
|
+
|
49
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
50
|
+
subject.ads_top.class.should == Array
|
51
|
+
subject.ads_top.each do |ad_top|
|
52
|
+
ad_top.should have_key(:rank)
|
53
|
+
ad_top.should have_key(:host)
|
54
|
+
ad_top.should have_key(:href)
|
55
|
+
ad_top.should have_key(:text)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe '#ads_right' do
|
61
|
+
it "should have 8 right ads" do
|
62
|
+
subject.ads_right.size.should == 0
|
63
|
+
end
|
64
|
+
|
65
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
66
|
+
subject.ads_right.class.should == Array
|
67
|
+
subject.ads_right.each do |ad_right|
|
68
|
+
ad_right.should have_key(:rank)
|
69
|
+
ad_right.should have_key(:host)
|
70
|
+
ad_right.should have_key(:href)
|
71
|
+
ad_right.should have_key(:text)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe '#ads_bottom' do
|
77
|
+
it "should have zero bottom ads" do
|
78
|
+
subject.ads_bottom.size.should == 3
|
79
|
+
end
|
80
|
+
|
81
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
82
|
+
subject.ads_bottom.class.should == Array
|
83
|
+
subject.ads_bottom.each do |ad_bottom|
|
84
|
+
ad_bottom.should have_key(:rank)
|
85
|
+
ad_bottom.should have_key(:host)
|
86
|
+
ad_bottom.should have_key(:href)
|
87
|
+
ad_bottom.should have_key(:text)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/spec/qihu_spec.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Query::Result::Qihu do
|
4
|
+
subject{Query::Result::Qihu.new(File.read($sample_qihu))}
|
5
|
+
|
6
|
+
it "can click the next page button" do
|
7
|
+
subject.next_url.should == 's?q=%E9%85%92%E5%BA%97&pn=2&j=0&ls=0&src=srp_paging&fr=360sou_home'
|
8
|
+
end
|
9
|
+
|
10
|
+
it "have over 1000 results" do
|
11
|
+
subject.count > 1000
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#seo_ranks' do
|
15
|
+
it "should put u.ctrip.com to be on first" do
|
16
|
+
subject.seo_ranks.first[:host].should == 'u.ctrip.com'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should put '合肥酒店查询预订_携程酒店 to be the first title" do
|
20
|
+
subject.seo_ranks.first[:text].should == '合肥酒店查询预订_携程酒店'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should put 'http://jiudian.qunar.com/' to be the second url" do
|
24
|
+
subject.seo_ranks[1][:href].should == 'http://jiudian.qunar.com/'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should have href,text,host elements for each seo result" do
|
28
|
+
subject.seo_ranks.each do |seo_rank|
|
29
|
+
seo_rank[:href].should_not == nil
|
30
|
+
seo_rank[:text].should_not == nil
|
31
|
+
seo_rank[:host].should_not == nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '#ads_top' do
|
37
|
+
it "should have 3 top ads" do
|
38
|
+
subject.ads_top.size.should == 3
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should find hotel.elong.com at the first position in the top ads" do
|
42
|
+
subject.ads_top[0][:host].should == 'www.booking.com'
|
43
|
+
end
|
44
|
+
|
45
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
46
|
+
subject.ads_top.class.should == Array
|
47
|
+
subject.ads_top.each do |ad_top|
|
48
|
+
ad_top.should have_key(:rank)
|
49
|
+
ad_top.should have_key(:host)
|
50
|
+
ad_top.should have_key(:href)
|
51
|
+
ad_top.should have_key(:text)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe '#ads_right' do
|
57
|
+
it "should have 8 right ads" do
|
58
|
+
subject.ads_right.size.should == 8
|
59
|
+
end
|
60
|
+
|
61
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
62
|
+
subject.ads_right.class.should == Array
|
63
|
+
subject.ads_right.each do |ad_right|
|
64
|
+
ad_right.should have_key(:rank)
|
65
|
+
ad_right.should have_key(:host)
|
66
|
+
ad_right.should have_key(:href)
|
67
|
+
ad_right.should have_key(:text)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe '#ads_bottom' do
|
73
|
+
it "should have zero bottom ads" do
|
74
|
+
subject.ads_bottom.size.should == 0
|
75
|
+
end
|
76
|
+
|
77
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
78
|
+
subject.ads_bottom.class.should == Array
|
79
|
+
subject.ads_bottom.each do |ad_bottom|
|
80
|
+
ad_bottom.should have_key(:rank)
|
81
|
+
ad_bottom.should have_key(:host)
|
82
|
+
ad_bottom.should have_key(:href)
|
83
|
+
ad_bottom.should have_key(:text)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|