query 0.0.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -1
  3. data/Gemfile +3 -1
  4. data/README.md +6 -1
  5. data/lib/query/engine/baidu.rb +12 -8
  6. data/lib/query/engine/baidu_mobile.rb +4 -4
  7. data/lib/query/engine/{qihoo.rb → qihu.rb} +8 -3
  8. data/lib/query/engine/{qihoo_mobile.rb → qihu_mobile.rb} +0 -0
  9. data/lib/query/engine/sogou.rb +45 -0
  10. data/lib/query/engine/sogou_mobile.rb +21 -0
  11. data/lib/query/engine.rb +11 -4
  12. data/lib/query/result/baidu.rb +57 -91
  13. data/lib/query/result/baidu_mobile.rb +49 -93
  14. data/lib/query/result/qihu.rb +66 -0
  15. data/lib/query/result/{qihoo_mobile.rb → qihu_mobile.rb} +1 -1
  16. data/lib/query/result/sogou.rb +103 -0
  17. data/lib/query/result/sogou_mobile.rb +51 -0
  18. data/lib/query/result.rb +47 -4
  19. data/lib/query/version.rb +1 -1
  20. data/lib/query.rb +6 -8
  21. data/query.gemspec +2 -3
  22. data/spec/baidu1_spec.rb +157 -0
  23. data/spec/baidu2_spec.rb +156 -0
  24. data/spec/mbaidu1_spec.rb +167 -0
  25. data/spec/msogou_spec.rb +91 -0
  26. data/spec/qihu_spec.rb +87 -0
  27. data/spec/samples/baidu1.html +521 -0
  28. data/spec/samples/baidu2.html +662 -0
  29. data/spec/samples/mbaidu1.html +2 -0
  30. data/spec/samples/mbaidu2.html +2 -0
  31. data/spec/samples/msogou.html +474 -0
  32. data/spec/samples/qihu.html +506 -0
  33. data/spec/samples/sogou.html +629 -0
  34. data/spec/sogou_mobile_spec.rb +86 -0
  35. data/spec/sogou_spec.rb +107 -0
  36. data/spec/spec_helper.rb +12 -1
  37. metadata +56 -31
  38. data/lib/query/engine/base.rb +0 -16
  39. data/lib/query/result/base.rb +0 -50
  40. data/lib/query/result/qihoo.rb +0 -75
  41. data/spec/baidu_mobile_spec.rb +0 -19
  42. data/spec/baidu_spec.rb +0 -73
  43. data/spec/qihoo_spec.rb +0 -27
@@ -0,0 +1,167 @@
1
+ #coding:UTF-8
2
+ #coding:UTF-8
3
+ require 'spec_helper'
4
+ describe Query::Result::BaiduMobile do
5
+ subject{Query::Result::BaiduMobile.new(File.read($sample_mbaidu1))}
6
+
7
+ it "can click the next page button" do
8
+ subject.next_url.should == 'http://m.baidu.com/from=844b/s?pn=10&usm=3&st=11108i&word=%E9%85%92%E5%BA%97&sa=np&ms=1'
9
+ end
10
+
11
+ it "cannot count results" do
12
+ subject.count.should be_nil
13
+ end
14
+
15
+ describe '#seo_ranks' do
16
+ it "should put hotel.qunar.com to be on first" do
17
+ subject.seo_ranks.first[:host].should == 'h.qunar.com'
18
+ end
19
+
20
+ it "should put 酒店查询与预订 to be the first title" do
21
+ subject.seo_ranks.first[:text].should == '酒店查询与预订'
22
+ end
23
+
24
+ it "should put 'http://map.baidu.com/mobile/webapp/search/search/qt=s&wd=%E9%85%92%E5%BA%97&c=131&b=&l=1&center_rank=1&nb_x=&nb_y=/?third_party=webapp-aladdin' to be the second url" do
25
+ subject.seo_ranks[1][:href].should == 'http://map.baidu.com/mobile/webapp/search/search/qt=s&wd=%E9%85%92%E5%BA%97&c=131&b=&l=1&center_rank=1&nb_x=&nb_y=/?third_party=webapp-aladdin'
26
+ end
27
+
28
+ it "should have href,text,host elements for each seo result" do
29
+ subject.seo_ranks.each do |seo_rank|
30
+ seo_rank[:href].should_not == nil
31
+ seo_rank[:text].should_not == nil
32
+ seo_rank[:host].should_not == nil
33
+ end
34
+ end
35
+ end
36
+
37
+ describe '#ads_top' do
38
+ it "has no top ads" do
39
+ subject.ads_top.size.should == 0
40
+ end
41
+
42
+ it "has no top ads" do
43
+ subject.ads_top[0].should be_nil
44
+ end
45
+
46
+ end
47
+
48
+ describe '#ads_right' do
49
+ it "has no bottom ads" do
50
+ subject.ads_right.size.should == 0
51
+ end
52
+
53
+ it "has an array of hashes with the required keys as the result of ads_right" do
54
+ subject.ads_right.class.should == Array
55
+ subject.ads_right.each do |ad_right|
56
+ ad_right.should have_key(:rank)
57
+ ad_right.should have_key(:host)
58
+ ad_right.should have_key(:href)
59
+ ad_right.should have_key(:text)
60
+ end
61
+ end
62
+ end
63
+
64
+ describe '#ads_bottom' do
65
+ it "has 3 bottom ads" do
66
+ subject.ads_bottom.size.should == 3
67
+ end
68
+
69
+ it "has an array of hashes with the required keys as the result of ads_bottom" do
70
+ subject.ads_bottom.class.should == Array
71
+ subject.ads_bottom.each do |ad_bottom|
72
+ ad_bottom.should have_key(:rank)
73
+ ad_bottom.should have_key(:host)
74
+ ad_bottom.should have_key(:href)
75
+ ad_bottom.should have_key(:text)
76
+ end
77
+ end
78
+ end
79
+ end
80
+ __END__
81
+ describe Query::Engine::Baidu do
82
+ page = Query::Engine::Baidu.query '百度'
83
+
84
+ it "should return Query::Result::Baidu" do
85
+ page.class.should == Query::Result::Baidu
86
+ end
87
+
88
+ it "should return 100,000,000" do
89
+ page.count.should > 100000
90
+ end
91
+ it "should return 1" do
92
+ page.rank('www.baidu.com').should == 1
93
+ end
94
+
95
+ it "should return Query::Result::Baidu" do
96
+ page.next.class.should == Query::Result::Baidu
97
+ end
98
+
99
+ it "should return true" do
100
+ bool = Query::Engine::Baidu.popular?'百度'
101
+ bool.should == true
102
+ end
103
+
104
+ it "should return false" do
105
+ bool = Query::Engine::Baidu.popular?'lavataliuming'
106
+ bool.should == false
107
+ end
108
+
109
+ it "should return over 5 words beginning with the query_word" do
110
+ query_word = '为'
111
+ suggestions = Query::Engine::Baidu.suggestions(query_word)
112
+ suggestions.size.should > 5
113
+ suggestions.each do |suggestion|
114
+ suggestion[0].should == query_word
115
+ end
116
+ end
117
+
118
+ it "should return 100,000,000" do
119
+ result = baidu.pages('baidu.com')
120
+ result.class.should == Query::Result::Baidu
121
+ result.count.should == 100000000
122
+ end
123
+
124
+ it "should return 100,000,000" do
125
+ result = baidu.links('baidu.com')
126
+ result.class.should == Query::Result::Baidu
127
+ result.count.should == 100000000
128
+ end
129
+ it "should return 100,000,000" do
130
+ result = baidu.pages_with('baidu.com','baidu.com')
131
+ result.class.should == Query::Result::Baidu
132
+ result.count.should == 100000000
133
+ end
134
+ it "查询已经被收录的页面收录情况时,应返回true" do
135
+ baidu.indexed?('http://www.baidu.com').should == true
136
+ end
137
+ it "查询一个不存在的页面收录情况时,应返回true" do
138
+ baidu.indexed?('http://zxv.not-exists.com').should == false
139
+ end
140
+ page1 = Query::Engine::Baidu.query('seoaqua.com')
141
+ it "查询结果应该都能拿到title,href,host" do
142
+ page1.seo_ranks.each do |id,rank|
143
+ rank['href'].should_not == nil
144
+ rank['text'].should_not == nil
145
+ rank['host'].should_not == nil
146
+ end
147
+ end
148
+ end
149
+ __END__
150
+ require 'spec_helper'
151
+ describe Query::Engine::BaiduMobile do
152
+ mbaidu = Query::Engine::BaiduMobile.new
153
+ page = mbaidu.query '百度'
154
+ it "应返回#{Query::Engine::BaiduMobile}" do
155
+ page.class.should == Query::Result::BaiduMobile
156
+ end
157
+ it "下一页也应是Query::Engine::BaiduMobile" do
158
+ page.next.class.should == Query::Result::BaiduMobile
159
+ page.next.next.class.should == Query::Result::BaiduMobile
160
+ end
161
+ it "百度百科域名应该大于1" do
162
+ page.rank('wapbaike.baidu.com').should > 1
163
+ end
164
+ it "百度无线域名应该在10以内" do
165
+ page.rank('m.baidu.com').should < 11
166
+ end
167
+ end
@@ -0,0 +1,91 @@
1
+ #coding:UTF-8
2
+ require 'spec_helper'
3
+ describe Query::Result::SogouMobile do
4
+ subject{Query::Result::SogouMobile.new(File.read($sample_msogou))}
5
+
6
+ it "can click the next page button" do
7
+ subject.next_url.should == './searchList.jsp?p=2&type=1&keyword=%E9%85%92%E5%BA%97%E9%A2%84%E8%AE%A2&uID=-xn_vif1ZEBEHaV4&sz=2-2&v=5&suuid=0946c9c5-f40d-42e0-ad2a-06e31fa97436'
8
+ end
9
+
10
+ it "have over 1000 results" do
11
+ subject.count.should be_nil
12
+ end
13
+
14
+
15
+ describe '#seo_ranks' do
16
+ it "should put u.ctrip.com to be on first" do
17
+ pending('sogou vr干扰')
18
+ subject.seo_ranks.first[:host].should == 'm.ctrip.com'
19
+ end
20
+
21
+ it "should put 北京酒店查询预订_携程旅行网 to be the first title" do
22
+ pending('sogou vr干扰')
23
+ subject.seo_ranks.first[:text].should == '北京酒店查询预订_携程旅行网 '
24
+ end
25
+
26
+ it "should put 'http://jiudian.qunar.com/' to be the second url" do
27
+ pending('sogou vr干扰')
28
+ subject.seo_ranks[1][:href].should == 'http://jiudian.qunar.com/'
29
+ end
30
+
31
+ it "should have href,text,host elements for each seo result" do
32
+ subject.seo_ranks.each do |seo_rank|
33
+ seo_rank[:href].should_not == nil
34
+ seo_rank[:text].should_not == nil
35
+ seo_rank[:host].should_not == nil
36
+ end
37
+ end
38
+ end
39
+
40
+ describe '#ads_top' do
41
+ it "should have 6 top ads" do
42
+ subject.ads_top.size.should == 3
43
+ end
44
+
45
+ it "should find hotel.elong.com at the first position in the top ads" do
46
+ subject.ads_top[0][:host].should == 'www.agoda.com'
47
+ end
48
+
49
+ it "has an array of hashes with the required keys as the result of ads_top" do
50
+ subject.ads_top.class.should == Array
51
+ subject.ads_top.each do |ad_top|
52
+ ad_top.should have_key(:rank)
53
+ ad_top.should have_key(:host)
54
+ ad_top.should have_key(:href)
55
+ ad_top.should have_key(:text)
56
+ end
57
+ end
58
+ end
59
+
60
+ describe '#ads_right' do
61
+ it "should have 8 right ads" do
62
+ subject.ads_right.size.should == 0
63
+ end
64
+
65
+ it "has an array of hashes with the required keys as the result of ads_right" do
66
+ subject.ads_right.class.should == Array
67
+ subject.ads_right.each do |ad_right|
68
+ ad_right.should have_key(:rank)
69
+ ad_right.should have_key(:host)
70
+ ad_right.should have_key(:href)
71
+ ad_right.should have_key(:text)
72
+ end
73
+ end
74
+ end
75
+
76
+ describe '#ads_bottom' do
77
+ it "should have zero bottom ads" do
78
+ subject.ads_bottom.size.should == 3
79
+ end
80
+
81
+ it "has an array of hashes with the required keys as the result of ads_bottom" do
82
+ subject.ads_bottom.class.should == Array
83
+ subject.ads_bottom.each do |ad_bottom|
84
+ ad_bottom.should have_key(:rank)
85
+ ad_bottom.should have_key(:host)
86
+ ad_bottom.should have_key(:href)
87
+ ad_bottom.should have_key(:text)
88
+ end
89
+ end
90
+ end
91
+ end
data/spec/qihu_spec.rb ADDED
@@ -0,0 +1,87 @@
1
+ #coding:UTF-8
2
+ require 'spec_helper'
3
+ describe Query::Result::Qihu do
4
+ subject{Query::Result::Qihu.new(File.read($sample_qihu))}
5
+
6
+ it "can click the next page button" do
7
+ subject.next_url.should == 's?q=%E9%85%92%E5%BA%97&pn=2&j=0&ls=0&src=srp_paging&fr=360sou_home'
8
+ end
9
+
10
+ it "have over 1000 results" do
11
+ subject.count > 1000
12
+ end
13
+
14
+ describe '#seo_ranks' do
15
+ it "should put u.ctrip.com to be on first" do
16
+ subject.seo_ranks.first[:host].should == 'u.ctrip.com'
17
+ end
18
+
19
+ it "should put '合肥酒店查询预订_携程酒店 to be the first title" do
20
+ subject.seo_ranks.first[:text].should == '合肥酒店查询预订_携程酒店'
21
+ end
22
+
23
+ it "should put 'http://jiudian.qunar.com/' to be the second url" do
24
+ subject.seo_ranks[1][:href].should == 'http://jiudian.qunar.com/'
25
+ end
26
+
27
+ it "should have href,text,host elements for each seo result" do
28
+ subject.seo_ranks.each do |seo_rank|
29
+ seo_rank[:href].should_not == nil
30
+ seo_rank[:text].should_not == nil
31
+ seo_rank[:host].should_not == nil
32
+ end
33
+ end
34
+ end
35
+
36
+ describe '#ads_top' do
37
+ it "should have 3 top ads" do
38
+ subject.ads_top.size.should == 3
39
+ end
40
+
41
+ it "should find hotel.elong.com at the first position in the top ads" do
42
+ subject.ads_top[0][:host].should == 'www.booking.com'
43
+ end
44
+
45
+ it "has an array of hashes with the required keys as the result of ads_top" do
46
+ subject.ads_top.class.should == Array
47
+ subject.ads_top.each do |ad_top|
48
+ ad_top.should have_key(:rank)
49
+ ad_top.should have_key(:host)
50
+ ad_top.should have_key(:href)
51
+ ad_top.should have_key(:text)
52
+ end
53
+ end
54
+ end
55
+
56
+ describe '#ads_right' do
57
+ it "should have 8 right ads" do
58
+ subject.ads_right.size.should == 8
59
+ end
60
+
61
+ it "has an array of hashes with the required keys as the result of ads_right" do
62
+ subject.ads_right.class.should == Array
63
+ subject.ads_right.each do |ad_right|
64
+ ad_right.should have_key(:rank)
65
+ ad_right.should have_key(:host)
66
+ ad_right.should have_key(:href)
67
+ ad_right.should have_key(:text)
68
+ end
69
+ end
70
+ end
71
+
72
+ describe '#ads_bottom' do
73
+ it "should have zero bottom ads" do
74
+ subject.ads_bottom.size.should == 0
75
+ end
76
+
77
+ it "has an array of hashes with the required keys as the result of ads_bottom" do
78
+ subject.ads_bottom.class.should == Array
79
+ subject.ads_bottom.each do |ad_bottom|
80
+ ad_bottom.should have_key(:rank)
81
+ ad_bottom.should have_key(:host)
82
+ ad_bottom.should have_key(:href)
83
+ ad_bottom.should have_key(:text)
84
+ end
85
+ end
86
+ end
87
+ end