query 0.0.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -1
- data/Gemfile +3 -1
- data/README.md +6 -1
- data/lib/query/engine/baidu.rb +12 -8
- data/lib/query/engine/baidu_mobile.rb +4 -4
- data/lib/query/engine/{qihoo.rb → qihu.rb} +8 -3
- data/lib/query/engine/{qihoo_mobile.rb → qihu_mobile.rb} +0 -0
- data/lib/query/engine/sogou.rb +45 -0
- data/lib/query/engine/sogou_mobile.rb +21 -0
- data/lib/query/engine.rb +11 -4
- data/lib/query/result/baidu.rb +57 -91
- data/lib/query/result/baidu_mobile.rb +49 -93
- data/lib/query/result/qihu.rb +66 -0
- data/lib/query/result/{qihoo_mobile.rb → qihu_mobile.rb} +1 -1
- data/lib/query/result/sogou.rb +103 -0
- data/lib/query/result/sogou_mobile.rb +51 -0
- data/lib/query/result.rb +47 -4
- data/lib/query/version.rb +1 -1
- data/lib/query.rb +6 -8
- data/query.gemspec +2 -3
- data/spec/baidu1_spec.rb +157 -0
- data/spec/baidu2_spec.rb +156 -0
- data/spec/mbaidu1_spec.rb +167 -0
- data/spec/msogou_spec.rb +91 -0
- data/spec/qihu_spec.rb +87 -0
- data/spec/samples/baidu1.html +521 -0
- data/spec/samples/baidu2.html +662 -0
- data/spec/samples/mbaidu1.html +2 -0
- data/spec/samples/mbaidu2.html +2 -0
- data/spec/samples/msogou.html +474 -0
- data/spec/samples/qihu.html +506 -0
- data/spec/samples/sogou.html +629 -0
- data/spec/sogou_mobile_spec.rb +86 -0
- data/spec/sogou_spec.rb +107 -0
- data/spec/spec_helper.rb +12 -1
- metadata +56 -31
- data/lib/query/engine/base.rb +0 -16
- data/lib/query/result/base.rb +0 -50
- data/lib/query/result/qihoo.rb +0 -75
- data/spec/baidu_mobile_spec.rb +0 -19
- data/spec/baidu_spec.rb +0 -73
- data/spec/qihoo_spec.rb +0 -27
@@ -0,0 +1,103 @@
|
|
1
|
+
module Query
|
2
|
+
module Result
|
3
|
+
class Sogou
|
4
|
+
include Query::Result
|
5
|
+
def ads_top
|
6
|
+
return [] if sponsored_divs.empty?
|
7
|
+
sponsored_divs.first.search("li").map.with_index do|li,index|
|
8
|
+
{
|
9
|
+
:rank => index + 1,
|
10
|
+
:text => li.css('h3 a').text,
|
11
|
+
:href => li.css('h3 a')[0]['href'],
|
12
|
+
:host => Addressable::URI.parse(li.css('cite')[0].text).host
|
13
|
+
}
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def ads_right
|
18
|
+
@page.css('div#right div#bdfs0').map.with_index do |div,index|
|
19
|
+
{
|
20
|
+
:rank => index + 1,
|
21
|
+
:text => div.css('h3 a').text,
|
22
|
+
:href => div.css('h3 a')[0]['href'],
|
23
|
+
:host => Addressable::URI.parse(div.css('div.fb a cite').text).host
|
24
|
+
}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def ads_bottom
|
29
|
+
return [] if sponsored_divs.size < 2
|
30
|
+
end
|
31
|
+
|
32
|
+
def seo_ranks
|
33
|
+
# @seo_ranks ||= @page.search("div[@class='result']/div/h3").map do |h3|
|
34
|
+
@page.search("//div[@class='results']/div/h3").map.with_index do |h3,index|
|
35
|
+
{
|
36
|
+
:text => h3.search('a').first.text,
|
37
|
+
:href => h3.search('a').first['href'],
|
38
|
+
:host => Addressable::URI.parse(h3.search('a').first['href']).host,
|
39
|
+
:rank => index + 1
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def count
|
45
|
+
["//div[@class='zhanzhang']//em", "//span[@id='scd_num']"].each do |xpath|
|
46
|
+
if counter_block = @page.search(xpath).first
|
47
|
+
return counter_block.text.gsub(/\D/,'').to_i
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def related_keywords
|
53
|
+
@related_keywords ||= @page.search("table[@id='hint_container']/td").map{|td|td.first.text}
|
54
|
+
end
|
55
|
+
|
56
|
+
def next_url
|
57
|
+
@page.search("//a[text()='下一页>']").first['href']
|
58
|
+
end
|
59
|
+
|
60
|
+
def has_result?
|
61
|
+
@page.search("div[@class='no-result']").empty?
|
62
|
+
end
|
63
|
+
|
64
|
+
# def rank(host)
|
65
|
+
# raise "unknown host object type:#{host}" unless host.class == Regexp or host.class == String
|
66
|
+
|
67
|
+
# result = {}
|
68
|
+
|
69
|
+
# #顶部广告排名
|
70
|
+
# ranking_ads_top = 0
|
71
|
+
# ads_top.each do |line|
|
72
|
+
# ranking_ads_top += 1
|
73
|
+
# if host.class == Regexp and line[:host] =~ host
|
74
|
+
# result[:rank_top] = ranking_ads_top
|
75
|
+
# break
|
76
|
+
# elsif host.class == String and line[:host] == host
|
77
|
+
# result[:rank_top] = ranking_ads_top
|
78
|
+
# break
|
79
|
+
# end
|
80
|
+
# end
|
81
|
+
|
82
|
+
# #右侧广告排名
|
83
|
+
# ranking_ads_right = 0
|
84
|
+
# ads_right.each do |line|
|
85
|
+
# ranking_ads_right += 1
|
86
|
+
# if host.class == Regexp and line[:host] =~ host
|
87
|
+
# result[:rank_right] = ranking_ads_right
|
88
|
+
# break
|
89
|
+
# elsif host.class == String and line[:host] == host
|
90
|
+
# result[:rank_right] = ranking_ads_right
|
91
|
+
# break
|
92
|
+
# end
|
93
|
+
# end
|
94
|
+
|
95
|
+
# result
|
96
|
+
# end
|
97
|
+
private
|
98
|
+
def sponsored_divs
|
99
|
+
@page.search("div[@class='sponsored']")
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
module Query
|
3
|
+
module Result
|
4
|
+
class SogouMobile
|
5
|
+
include Query::Result
|
6
|
+
def ads_top
|
7
|
+
@page.search("//ul[@class='searchresult']/li[1]/preceding-sibling::div").map.with_index do |ad_div,index|
|
8
|
+
parse_ad(ad_div).merge({:rank => index + 1})
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def ads_right
|
13
|
+
[]
|
14
|
+
end
|
15
|
+
|
16
|
+
def ads_bottom
|
17
|
+
@page.search("//ul[@class='searchresult']/li[last()]/following-sibling::div").map.with_index do |div,index|
|
18
|
+
parse_ad(div).merge({:rank => index + 1})
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def seo_ranks
|
23
|
+
@seo_rank ||= @page.search("//ul[@class='searchresult']/li/a").map.with_index do |a,index|
|
24
|
+
href = URI.decode(CGI.parse(URI(URI.encode(a['href'])).query)['url'].first)
|
25
|
+
{
|
26
|
+
:rank => index + 1,
|
27
|
+
:text => a.search('h3').text,
|
28
|
+
:href => href,
|
29
|
+
:host => URI(href).host
|
30
|
+
}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def next_url
|
35
|
+
@page.search("//a[text()='下一页']").first['href']
|
36
|
+
end
|
37
|
+
|
38
|
+
def count
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def parse_ad(ad_div)
|
43
|
+
{
|
44
|
+
:text => ad_div.search('h3').first.text,
|
45
|
+
:href => ad_div.search('a').first['href'],
|
46
|
+
:host => Addressable::URI.parse("http://#{ad_div.search('span[@class="site"]').text}").host
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/query/result.rb
CHANGED
@@ -1,10 +1,53 @@
|
|
1
1
|
module Query
|
2
|
-
|
2
|
+
module Result
|
3
|
+
attr_accessor :baseuri,:pagenumber,:perpage
|
4
|
+
def initialize(page)
|
5
|
+
@page = Nokogiri::HTML page
|
6
|
+
@pagenumber = 1
|
3
7
|
end
|
8
|
+
def raw_ranks
|
9
|
+
{
|
10
|
+
'ads_top'=>ads_top,
|
11
|
+
'ads_right'=>ads_right,
|
12
|
+
'ads_bottom'=>ads_bottom,
|
13
|
+
'seo_ranks'=>seo_ranks
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def rank(host)#on base of ranks
|
18
|
+
@rank ||= %w(seo_ranks ads_top ads_right ads_bottom).map do |type_str|
|
19
|
+
result = nil
|
20
|
+
send(type_str).each_with_index do |line,index|
|
21
|
+
if host.class == Regexp
|
22
|
+
result = index + 1 and break if line[:host] =~ host
|
23
|
+
elsif host.class == String
|
24
|
+
result = index + 1 and break if line[:host] == host
|
25
|
+
else
|
26
|
+
result = false
|
27
|
+
end
|
28
|
+
end
|
29
|
+
result
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def next
|
34
|
+
@next_url = URI.join(@baseuri,next_url).to_s
|
35
|
+
next_page = HTTParty.get @next_url
|
36
|
+
next_page = self.class.new(next_page)
|
37
|
+
next_page.baseuri = @next_url
|
38
|
+
next_page.pagenumber = @pagenumber + 1
|
39
|
+
next_page.perpage = @perpage
|
40
|
+
r = next_page
|
41
|
+
r.baseuri = next_url
|
42
|
+
r
|
43
|
+
end
|
44
|
+
end
|
4
45
|
end
|
5
46
|
require 'nokogiri'
|
6
|
-
require
|
47
|
+
require "addressable/uri"
|
7
48
|
require 'query/result/baidu'
|
8
49
|
require 'query/result/baidu_mobile'
|
9
|
-
require 'query/result/
|
10
|
-
require 'query/result/
|
50
|
+
require 'query/result/qihu'
|
51
|
+
require 'query/result/qihu_mobile'
|
52
|
+
require 'query/result/sogou'
|
53
|
+
require 'query/result/sogou_mobile'
|
data/lib/query/version.rb
CHANGED
data/lib/query.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
require 'addressable/uri'
|
6
|
-
require 'awesome_print'
|
7
|
-
module Query
|
8
|
-
# Your code goes here...
|
1
|
+
class MyFilter
|
2
|
+
def contains set, str
|
3
|
+
set.any? { |x| x.to_s.downcase == str.downcase}
|
4
|
+
end
|
9
5
|
end
|
6
|
+
require 'query/result'
|
7
|
+
require 'query/engine'
|
data/query.gemspec
CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.version = Query::VERSION
|
9
9
|
spec.authors = ["seoaqua"]
|
10
10
|
spec.email = ["seoaqua@me.com"]
|
11
|
-
spec.description = %q{This GEM is designed to work for SEOers who need to fetch query and parse results from all kinds of search engines}
|
12
|
-
spec.summary = %q{
|
11
|
+
spec.description = %q{This GEM is designed to work for Chinese SEOers who need to fetch query and parse results from all kinds of search engines}
|
12
|
+
spec.summary = %q{I dont have time to write the document yet. Usage is almost within rspec tests. Any questions,pls contact me with QQ628552}
|
13
13
|
spec.homepage = "https://github.com/seoaqua/query"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -23,5 +23,4 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_dependency "nokogiri"
|
24
24
|
spec.add_dependency "addressable"
|
25
25
|
spec.add_dependency "httparty"
|
26
|
-
|
27
26
|
end
|
data/spec/baidu1_spec.rb
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Query::Result::Baidu do
|
4
|
+
subject{Query::Result::Baidu.new(File.read($sample_baidu1))}
|
5
|
+
|
6
|
+
it "can click the next page button" do
|
7
|
+
subject.next_url.should == '/s?wd=%E5%90%8C%E7%A8%8B%E7%BD%91%E9%85%92%E5%BA%97%E9%A2%84%E8%AE%A2&pn=10&tn=baiduhome_pg&ie=utf-8&f=3&usm=2&rsv_page=1'
|
8
|
+
end
|
9
|
+
|
10
|
+
it "have 69200000 results" do
|
11
|
+
subject.count.should == 69200000
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
describe '#seo_ranks' do
|
16
|
+
it "puts www.17u.cn to be on first" do
|
17
|
+
subject.seo_ranks.first[:host].should == 'www.17u.cn'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should put 同程旅游网客服电话 to be the first title" do
|
21
|
+
subject.seo_ranks.first[:text].should == '同程旅游网客服电话'
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should put 'http://www.17u.cn/' to be the second url" do
|
25
|
+
subject.seo_ranks[1][:href].should == 'http://www.17u.cn/'
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should have href,text,host elements for each seo result" do
|
29
|
+
subject.seo_ranks.each do |seo_rank|
|
30
|
+
seo_rank[:href].should_not == nil
|
31
|
+
seo_rank[:text].should_not == nil
|
32
|
+
seo_rank[:host].should_not == nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '#ads_top' do
|
38
|
+
it "should have 6 top ads" do
|
39
|
+
subject.ads_top.size.should == 3
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should find hotel.elong.com at the first position in the top ads" do
|
43
|
+
subject.ads_top[0][:host].should == 'www.17u.cn'
|
44
|
+
end
|
45
|
+
|
46
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
47
|
+
subject.ads_top.class.should == Array
|
48
|
+
subject.ads_top.each do |ad_top|
|
49
|
+
ad_top.should have_key(:rank)
|
50
|
+
ad_top.should have_key(:host)
|
51
|
+
ad_top.should have_key(:href)
|
52
|
+
ad_top.should have_key(:text)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '#ads_right' do
|
58
|
+
it "should have 5 right ads" do
|
59
|
+
subject.ads_right.size.should == 5
|
60
|
+
end
|
61
|
+
|
62
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
63
|
+
subject.ads_right.class.should == Array
|
64
|
+
subject.ads_right.each do |ad_right|
|
65
|
+
ad_right.should have_key(:rank)
|
66
|
+
ad_right.should have_key(:host)
|
67
|
+
ad_right.should have_key(:href)
|
68
|
+
ad_right.should have_key(:text)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe '#ads_bottom' do
|
74
|
+
it "should have zero bottom ads" do
|
75
|
+
subject.ads_bottom.size.should == 3
|
76
|
+
end
|
77
|
+
|
78
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
79
|
+
subject.ads_bottom.class.should == Array
|
80
|
+
subject.ads_bottom.each do |ad_bottom|
|
81
|
+
ad_bottom.should have_key(:rank)
|
82
|
+
ad_bottom.should have_key(:host)
|
83
|
+
ad_bottom.should have_key(:href)
|
84
|
+
ad_bottom.should have_key(:text)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
__END__
|
90
|
+
describe Query::Engine::Baidu do
|
91
|
+
page = Query::Engine::Baidu.query '百度'
|
92
|
+
|
93
|
+
it "should return Query::Result::Baidu" do
|
94
|
+
page.class.should == Query::Result::Baidu
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should return 100,000,000" do
|
98
|
+
page.count.should > 100000
|
99
|
+
end
|
100
|
+
it "should return 1" do
|
101
|
+
page.rank('www.baidu.com').should == 1
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should return Query::Result::Baidu" do
|
105
|
+
page.next.class.should == Query::Result::Baidu
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should return true" do
|
109
|
+
bool = Query::Engine::Baidu.popular?'百度'
|
110
|
+
bool.should == true
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should return false" do
|
114
|
+
bool = Query::Engine::Baidu.popular?'lavataliuming'
|
115
|
+
bool.should == false
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should return over 5 words beginning with the query_word" do
|
119
|
+
query_word = '为'
|
120
|
+
suggestions = Query::Engine::Baidu.suggestions(query_word)
|
121
|
+
suggestions.size.should > 5
|
122
|
+
suggestions.each do |suggestion|
|
123
|
+
suggestion[0].should == query_word
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should return 100,000,000" do
|
128
|
+
result = baidu.pages('baidu.com')
|
129
|
+
result.class.should == Query::Result::Baidu
|
130
|
+
result.count.should == 100000000
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should return 100,000,000" do
|
134
|
+
result = baidu.links('baidu.com')
|
135
|
+
result.class.should == Query::Result::Baidu
|
136
|
+
result.count.should == 100000000
|
137
|
+
end
|
138
|
+
it "should return 100,000,000" do
|
139
|
+
result = baidu.pages_with('baidu.com','baidu.com')
|
140
|
+
result.class.should == Query::Result::Baidu
|
141
|
+
result.count.should == 100000000
|
142
|
+
end
|
143
|
+
it "查询已经被收录的页面收录情况时,应返回true" do
|
144
|
+
baidu.indexed?('http://www.baidu.com').should == true
|
145
|
+
end
|
146
|
+
it "查询一个不存在的页面收录情况时,应返回true" do
|
147
|
+
baidu.indexed?('http://zxv.not-exists.com').should == false
|
148
|
+
end
|
149
|
+
page1 = Query::Engine::Baidu.query('seoaqua.com')
|
150
|
+
it "查询结果应该都能拿到title,href,host" do
|
151
|
+
page1.seo_ranks.each do |id,rank|
|
152
|
+
rank['href'].should_not == nil
|
153
|
+
rank['text'].should_not == nil
|
154
|
+
rank['host'].should_not == nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
data/spec/baidu2_spec.rb
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Query::Result::Baidu do
|
4
|
+
subject{Query::Result::Baidu.new(File.read($sample_baidu2))}
|
5
|
+
|
6
|
+
it "can click the next page button" do
|
7
|
+
subject.next_url.should == '/s?wd=%E9%85%92%E5%BA%97%E9%A2%84%E8%AE%A2%E7%BD%91&pn=10&tn=baiduhome_pg&ie=utf-8&f=3&usm=1&rsv_page=1'
|
8
|
+
end
|
9
|
+
|
10
|
+
it "have 100000000 results" do
|
11
|
+
subject.count.should == 100000000
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#seo_ranks' do
|
15
|
+
it "should put hotel.qunar.com to be on first" do
|
16
|
+
subject.seo_ranks.first[:host].should == 'hotel.qunar.com'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should put 北京酒店预订_8371家特惠酒店_百度品质保证_去哪儿网提供 to be the first title" do
|
20
|
+
subject.seo_ranks.first[:text].should == '北京酒店预订_8371家特惠酒店_百度品质保证_去哪儿网提供'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should put 'http://jiudian.qunar.com/' to be the second url" do
|
24
|
+
subject.seo_ranks[1][:href].should == 'http://www.baidu.com/link?url=ZpGwUrZ8xUUgBQofg1TiNH1n_Ki3QWE62jvjkGvwwZ70wQPxFJxSD1uunh0uDwLM'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should have href,text,host elements for each seo result" do
|
28
|
+
subject.seo_ranks.each do |seo_rank|
|
29
|
+
seo_rank[:href].should_not == nil
|
30
|
+
seo_rank[:text].should_not == nil
|
31
|
+
seo_rank[:host].should_not == nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '#ads_top' do
|
37
|
+
it "should have 6 top ads" do
|
38
|
+
subject.ads_top.size.should == 4
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should find hotel.elong.com at the first position in the top ads" do
|
42
|
+
subject.ads_top[0][:host].should == 'www.agoda.com'
|
43
|
+
end
|
44
|
+
|
45
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
46
|
+
subject.ads_top.class.should == Array
|
47
|
+
subject.ads_top.each do |ad_top|
|
48
|
+
ad_top.should have_key(:rank)
|
49
|
+
ad_top.should have_key(:host)
|
50
|
+
ad_top.should have_key(:href)
|
51
|
+
ad_top.should have_key(:text)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe '#ads_right' do
|
57
|
+
it "should have 8 right ads" do
|
58
|
+
subject.ads_right.size.should == 8
|
59
|
+
end
|
60
|
+
|
61
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
62
|
+
subject.ads_right.class.should == Array
|
63
|
+
subject.ads_right.each do |ad_right|
|
64
|
+
ad_right.should have_key(:rank)
|
65
|
+
ad_right.should have_key(:host)
|
66
|
+
ad_right.should have_key(:href)
|
67
|
+
ad_right.should have_key(:text)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe '#ads_bottom' do
|
73
|
+
it "should have zero bottom ads" do
|
74
|
+
subject.ads_bottom.size.should == 0
|
75
|
+
end
|
76
|
+
|
77
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
78
|
+
subject.ads_bottom.class.should == Array
|
79
|
+
subject.ads_bottom.each do |ad_bottom|
|
80
|
+
ad_bottom.should have_key(:rank)
|
81
|
+
ad_bottom.should have_key(:host)
|
82
|
+
ad_bottom.should have_key(:href)
|
83
|
+
ad_bottom.should have_key(:text)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
__END__
|
89
|
+
describe Query::Engine::Baidu do
|
90
|
+
page = Query::Engine::Baidu.query '百度'
|
91
|
+
|
92
|
+
it "should return Query::Result::Baidu" do
|
93
|
+
page.class.should == Query::Result::Baidu
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should return 100,000,000" do
|
97
|
+
page.count.should > 100000
|
98
|
+
end
|
99
|
+
it "should return 1" do
|
100
|
+
page.rank('www.baidu.com').should == 1
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should return Query::Result::Baidu" do
|
104
|
+
page.next.class.should == Query::Result::Baidu
|
105
|
+
end
|
106
|
+
|
107
|
+
it "should return true" do
|
108
|
+
bool = Query::Engine::Baidu.popular?'百度'
|
109
|
+
bool.should == true
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should return false" do
|
113
|
+
bool = Query::Engine::Baidu.popular?'lavataliuming'
|
114
|
+
bool.should == false
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should return over 5 words beginning with the query_word" do
|
118
|
+
query_word = '为'
|
119
|
+
suggestions = Query::Engine::Baidu.suggestions(query_word)
|
120
|
+
suggestions.size.should > 5
|
121
|
+
suggestions.each do |suggestion|
|
122
|
+
suggestion[0].should == query_word
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should return 100,000,000" do
|
127
|
+
result = baidu.pages('baidu.com')
|
128
|
+
result.class.should == Query::Result::Baidu
|
129
|
+
result.count.should == 100000000
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should return 100,000,000" do
|
133
|
+
result = baidu.links('baidu.com')
|
134
|
+
result.class.should == Query::Result::Baidu
|
135
|
+
result.count.should == 100000000
|
136
|
+
end
|
137
|
+
it "should return 100,000,000" do
|
138
|
+
result = baidu.pages_with('baidu.com','baidu.com')
|
139
|
+
result.class.should == Query::Result::Baidu
|
140
|
+
result.count.should == 100000000
|
141
|
+
end
|
142
|
+
it "查询已经被收录的页面收录情况时,应返回true" do
|
143
|
+
baidu.indexed?('http://www.baidu.com').should == true
|
144
|
+
end
|
145
|
+
it "查询一个不存在的页面收录情况时,应返回true" do
|
146
|
+
baidu.indexed?('http://zxv.not-exists.com').should == false
|
147
|
+
end
|
148
|
+
page1 = Query::Engine::Baidu.query('seoaqua.com')
|
149
|
+
it "查询结果应该都能拿到title,href,host" do
|
150
|
+
page1.seo_ranks.each do |id,rank|
|
151
|
+
rank['href'].should_not == nil
|
152
|
+
rank['text'].should_not == nil
|
153
|
+
rank['host'].should_not == nil
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|