query 0.0.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -1
- data/Gemfile +3 -1
- data/README.md +6 -1
- data/lib/query/engine/baidu.rb +12 -8
- data/lib/query/engine/baidu_mobile.rb +4 -4
- data/lib/query/engine/{qihoo.rb → qihu.rb} +8 -3
- data/lib/query/engine/{qihoo_mobile.rb → qihu_mobile.rb} +0 -0
- data/lib/query/engine/sogou.rb +45 -0
- data/lib/query/engine/sogou_mobile.rb +21 -0
- data/lib/query/engine.rb +11 -4
- data/lib/query/result/baidu.rb +57 -91
- data/lib/query/result/baidu_mobile.rb +49 -93
- data/lib/query/result/qihu.rb +66 -0
- data/lib/query/result/{qihoo_mobile.rb → qihu_mobile.rb} +1 -1
- data/lib/query/result/sogou.rb +103 -0
- data/lib/query/result/sogou_mobile.rb +51 -0
- data/lib/query/result.rb +47 -4
- data/lib/query/version.rb +1 -1
- data/lib/query.rb +6 -8
- data/query.gemspec +2 -3
- data/spec/baidu1_spec.rb +157 -0
- data/spec/baidu2_spec.rb +156 -0
- data/spec/mbaidu1_spec.rb +167 -0
- data/spec/msogou_spec.rb +91 -0
- data/spec/qihu_spec.rb +87 -0
- data/spec/samples/baidu1.html +521 -0
- data/spec/samples/baidu2.html +662 -0
- data/spec/samples/mbaidu1.html +2 -0
- data/spec/samples/mbaidu2.html +2 -0
- data/spec/samples/msogou.html +474 -0
- data/spec/samples/qihu.html +506 -0
- data/spec/samples/sogou.html +629 -0
- data/spec/sogou_mobile_spec.rb +86 -0
- data/spec/sogou_spec.rb +107 -0
- data/spec/spec_helper.rb +12 -1
- metadata +56 -31
- data/lib/query/engine/base.rb +0 -16
- data/lib/query/result/base.rb +0 -50
- data/lib/query/result/qihoo.rb +0 -75
- data/spec/baidu_mobile_spec.rb +0 -19
- data/spec/baidu_spec.rb +0 -73
- data/spec/qihoo_spec.rb +0 -27
@@ -0,0 +1,103 @@
|
|
1
|
+
module Query
|
2
|
+
module Result
|
3
|
+
class Sogou
|
4
|
+
include Query::Result
|
5
|
+
def ads_top
|
6
|
+
return [] if sponsored_divs.empty?
|
7
|
+
sponsored_divs.first.search("li").map.with_index do|li,index|
|
8
|
+
{
|
9
|
+
:rank => index + 1,
|
10
|
+
:text => li.css('h3 a').text,
|
11
|
+
:href => li.css('h3 a')[0]['href'],
|
12
|
+
:host => Addressable::URI.parse(li.css('cite')[0].text).host
|
13
|
+
}
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def ads_right
|
18
|
+
@page.css('div#right div#bdfs0').map.with_index do |div,index|
|
19
|
+
{
|
20
|
+
:rank => index + 1,
|
21
|
+
:text => div.css('h3 a').text,
|
22
|
+
:href => div.css('h3 a')[0]['href'],
|
23
|
+
:host => Addressable::URI.parse(div.css('div.fb a cite').text).host
|
24
|
+
}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def ads_bottom
|
29
|
+
return [] if sponsored_divs.size < 2
|
30
|
+
end
|
31
|
+
|
32
|
+
def seo_ranks
|
33
|
+
# @seo_ranks ||= @page.search("div[@class='result']/div/h3").map do |h3|
|
34
|
+
@page.search("//div[@class='results']/div/h3").map.with_index do |h3,index|
|
35
|
+
{
|
36
|
+
:text => h3.search('a').first.text,
|
37
|
+
:href => h3.search('a').first['href'],
|
38
|
+
:host => Addressable::URI.parse(h3.search('a').first['href']).host,
|
39
|
+
:rank => index + 1
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def count
|
45
|
+
["//div[@class='zhanzhang']//em", "//span[@id='scd_num']"].each do |xpath|
|
46
|
+
if counter_block = @page.search(xpath).first
|
47
|
+
return counter_block.text.gsub(/\D/,'').to_i
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def related_keywords
|
53
|
+
@related_keywords ||= @page.search("table[@id='hint_container']/td").map{|td|td.first.text}
|
54
|
+
end
|
55
|
+
|
56
|
+
def next_url
|
57
|
+
@page.search("//a[text()='下一页>']").first['href']
|
58
|
+
end
|
59
|
+
|
60
|
+
def has_result?
|
61
|
+
@page.search("div[@class='no-result']").empty?
|
62
|
+
end
|
63
|
+
|
64
|
+
# def rank(host)
|
65
|
+
# raise "unknown host object type:#{host}" unless host.class == Regexp or host.class == String
|
66
|
+
|
67
|
+
# result = {}
|
68
|
+
|
69
|
+
# #顶部广告排名
|
70
|
+
# ranking_ads_top = 0
|
71
|
+
# ads_top.each do |line|
|
72
|
+
# ranking_ads_top += 1
|
73
|
+
# if host.class == Regexp and line[:host] =~ host
|
74
|
+
# result[:rank_top] = ranking_ads_top
|
75
|
+
# break
|
76
|
+
# elsif host.class == String and line[:host] == host
|
77
|
+
# result[:rank_top] = ranking_ads_top
|
78
|
+
# break
|
79
|
+
# end
|
80
|
+
# end
|
81
|
+
|
82
|
+
# #右侧广告排名
|
83
|
+
# ranking_ads_right = 0
|
84
|
+
# ads_right.each do |line|
|
85
|
+
# ranking_ads_right += 1
|
86
|
+
# if host.class == Regexp and line[:host] =~ host
|
87
|
+
# result[:rank_right] = ranking_ads_right
|
88
|
+
# break
|
89
|
+
# elsif host.class == String and line[:host] == host
|
90
|
+
# result[:rank_right] = ranking_ads_right
|
91
|
+
# break
|
92
|
+
# end
|
93
|
+
# end
|
94
|
+
|
95
|
+
# result
|
96
|
+
# end
|
97
|
+
private
|
98
|
+
def sponsored_divs
|
99
|
+
@page.search("div[@class='sponsored']")
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
module Query
|
3
|
+
module Result
|
4
|
+
class SogouMobile
|
5
|
+
include Query::Result
|
6
|
+
def ads_top
|
7
|
+
@page.search("//ul[@class='searchresult']/li[1]/preceding-sibling::div").map.with_index do |ad_div,index|
|
8
|
+
parse_ad(ad_div).merge({:rank => index + 1})
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def ads_right
|
13
|
+
[]
|
14
|
+
end
|
15
|
+
|
16
|
+
def ads_bottom
|
17
|
+
@page.search("//ul[@class='searchresult']/li[last()]/following-sibling::div").map.with_index do |div,index|
|
18
|
+
parse_ad(div).merge({:rank => index + 1})
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def seo_ranks
|
23
|
+
@seo_rank ||= @page.search("//ul[@class='searchresult']/li/a").map.with_index do |a,index|
|
24
|
+
href = URI.decode(CGI.parse(URI(URI.encode(a['href'])).query)['url'].first)
|
25
|
+
{
|
26
|
+
:rank => index + 1,
|
27
|
+
:text => a.search('h3').text,
|
28
|
+
:href => href,
|
29
|
+
:host => URI(href).host
|
30
|
+
}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def next_url
|
35
|
+
@page.search("//a[text()='下一页']").first['href']
|
36
|
+
end
|
37
|
+
|
38
|
+
def count
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def parse_ad(ad_div)
|
43
|
+
{
|
44
|
+
:text => ad_div.search('h3').first.text,
|
45
|
+
:href => ad_div.search('a').first['href'],
|
46
|
+
:host => Addressable::URI.parse("http://#{ad_div.search('span[@class="site"]').text}").host
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/query/result.rb
CHANGED
@@ -1,10 +1,53 @@
|
|
1
1
|
module Query
|
2
|
-
|
2
|
+
module Result
|
3
|
+
attr_accessor :baseuri,:pagenumber,:perpage
|
4
|
+
def initialize(page)
|
5
|
+
@page = Nokogiri::HTML page
|
6
|
+
@pagenumber = 1
|
3
7
|
end
|
8
|
+
def raw_ranks
|
9
|
+
{
|
10
|
+
'ads_top'=>ads_top,
|
11
|
+
'ads_right'=>ads_right,
|
12
|
+
'ads_bottom'=>ads_bottom,
|
13
|
+
'seo_ranks'=>seo_ranks
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def rank(host)#on base of ranks
|
18
|
+
@rank ||= %w(seo_ranks ads_top ads_right ads_bottom).map do |type_str|
|
19
|
+
result = nil
|
20
|
+
send(type_str).each_with_index do |line,index|
|
21
|
+
if host.class == Regexp
|
22
|
+
result = index + 1 and break if line[:host] =~ host
|
23
|
+
elsif host.class == String
|
24
|
+
result = index + 1 and break if line[:host] == host
|
25
|
+
else
|
26
|
+
result = false
|
27
|
+
end
|
28
|
+
end
|
29
|
+
result
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def next
|
34
|
+
@next_url = URI.join(@baseuri,next_url).to_s
|
35
|
+
next_page = HTTParty.get @next_url
|
36
|
+
next_page = self.class.new(next_page)
|
37
|
+
next_page.baseuri = @next_url
|
38
|
+
next_page.pagenumber = @pagenumber + 1
|
39
|
+
next_page.perpage = @perpage
|
40
|
+
r = next_page
|
41
|
+
r.baseuri = next_url
|
42
|
+
r
|
43
|
+
end
|
44
|
+
end
|
4
45
|
end
|
5
46
|
require 'nokogiri'
|
6
|
-
require
|
47
|
+
require "addressable/uri"
|
7
48
|
require 'query/result/baidu'
|
8
49
|
require 'query/result/baidu_mobile'
|
9
|
-
require 'query/result/
|
10
|
-
require 'query/result/
|
50
|
+
require 'query/result/qihu'
|
51
|
+
require 'query/result/qihu_mobile'
|
52
|
+
require 'query/result/sogou'
|
53
|
+
require 'query/result/sogou_mobile'
|
data/lib/query/version.rb
CHANGED
data/lib/query.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
require 'addressable/uri'
|
6
|
-
require 'awesome_print'
|
7
|
-
module Query
|
8
|
-
# Your code goes here...
|
1
|
+
class MyFilter
|
2
|
+
def contains set, str
|
3
|
+
set.any? { |x| x.to_s.downcase == str.downcase}
|
4
|
+
end
|
9
5
|
end
|
6
|
+
require 'query/result'
|
7
|
+
require 'query/engine'
|
data/query.gemspec
CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.version = Query::VERSION
|
9
9
|
spec.authors = ["seoaqua"]
|
10
10
|
spec.email = ["seoaqua@me.com"]
|
11
|
-
spec.description = %q{This GEM is designed to work for SEOers who need to fetch query and parse results from all kinds of search engines}
|
12
|
-
spec.summary = %q{
|
11
|
+
spec.description = %q{This GEM is designed to work for Chinese SEOers who need to fetch query and parse results from all kinds of search engines}
|
12
|
+
spec.summary = %q{I dont have time to write the document yet. Usage is almost within rspec tests. Any questions,pls contact me with QQ628552}
|
13
13
|
spec.homepage = "https://github.com/seoaqua/query"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -23,5 +23,4 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_dependency "nokogiri"
|
24
24
|
spec.add_dependency "addressable"
|
25
25
|
spec.add_dependency "httparty"
|
26
|
-
|
27
26
|
end
|
data/spec/baidu1_spec.rb
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Query::Result::Baidu do
|
4
|
+
subject{Query::Result::Baidu.new(File.read($sample_baidu1))}
|
5
|
+
|
6
|
+
it "can click the next page button" do
|
7
|
+
subject.next_url.should == '/s?wd=%E5%90%8C%E7%A8%8B%E7%BD%91%E9%85%92%E5%BA%97%E9%A2%84%E8%AE%A2&pn=10&tn=baiduhome_pg&ie=utf-8&f=3&usm=2&rsv_page=1'
|
8
|
+
end
|
9
|
+
|
10
|
+
it "have 69200000 results" do
|
11
|
+
subject.count.should == 69200000
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
describe '#seo_ranks' do
|
16
|
+
it "puts www.17u.cn to be on first" do
|
17
|
+
subject.seo_ranks.first[:host].should == 'www.17u.cn'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should put 同程旅游网客服电话 to be the first title" do
|
21
|
+
subject.seo_ranks.first[:text].should == '同程旅游网客服电话'
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should put 'http://www.17u.cn/' to be the second url" do
|
25
|
+
subject.seo_ranks[1][:href].should == 'http://www.17u.cn/'
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should have href,text,host elements for each seo result" do
|
29
|
+
subject.seo_ranks.each do |seo_rank|
|
30
|
+
seo_rank[:href].should_not == nil
|
31
|
+
seo_rank[:text].should_not == nil
|
32
|
+
seo_rank[:host].should_not == nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '#ads_top' do
|
38
|
+
it "should have 6 top ads" do
|
39
|
+
subject.ads_top.size.should == 3
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should find hotel.elong.com at the first position in the top ads" do
|
43
|
+
subject.ads_top[0][:host].should == 'www.17u.cn'
|
44
|
+
end
|
45
|
+
|
46
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
47
|
+
subject.ads_top.class.should == Array
|
48
|
+
subject.ads_top.each do |ad_top|
|
49
|
+
ad_top.should have_key(:rank)
|
50
|
+
ad_top.should have_key(:host)
|
51
|
+
ad_top.should have_key(:href)
|
52
|
+
ad_top.should have_key(:text)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '#ads_right' do
|
58
|
+
it "should have 5 right ads" do
|
59
|
+
subject.ads_right.size.should == 5
|
60
|
+
end
|
61
|
+
|
62
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
63
|
+
subject.ads_right.class.should == Array
|
64
|
+
subject.ads_right.each do |ad_right|
|
65
|
+
ad_right.should have_key(:rank)
|
66
|
+
ad_right.should have_key(:host)
|
67
|
+
ad_right.should have_key(:href)
|
68
|
+
ad_right.should have_key(:text)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe '#ads_bottom' do
|
74
|
+
it "should have zero bottom ads" do
|
75
|
+
subject.ads_bottom.size.should == 3
|
76
|
+
end
|
77
|
+
|
78
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
79
|
+
subject.ads_bottom.class.should == Array
|
80
|
+
subject.ads_bottom.each do |ad_bottom|
|
81
|
+
ad_bottom.should have_key(:rank)
|
82
|
+
ad_bottom.should have_key(:host)
|
83
|
+
ad_bottom.should have_key(:href)
|
84
|
+
ad_bottom.should have_key(:text)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
__END__
|
90
|
+
describe Query::Engine::Baidu do
|
91
|
+
page = Query::Engine::Baidu.query '百度'
|
92
|
+
|
93
|
+
it "should return Query::Result::Baidu" do
|
94
|
+
page.class.should == Query::Result::Baidu
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should return 100,000,000" do
|
98
|
+
page.count.should > 100000
|
99
|
+
end
|
100
|
+
it "should return 1" do
|
101
|
+
page.rank('www.baidu.com').should == 1
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should return Query::Result::Baidu" do
|
105
|
+
page.next.class.should == Query::Result::Baidu
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should return true" do
|
109
|
+
bool = Query::Engine::Baidu.popular?'百度'
|
110
|
+
bool.should == true
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should return false" do
|
114
|
+
bool = Query::Engine::Baidu.popular?'lavataliuming'
|
115
|
+
bool.should == false
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should return over 5 words beginning with the query_word" do
|
119
|
+
query_word = '为'
|
120
|
+
suggestions = Query::Engine::Baidu.suggestions(query_word)
|
121
|
+
suggestions.size.should > 5
|
122
|
+
suggestions.each do |suggestion|
|
123
|
+
suggestion[0].should == query_word
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should return 100,000,000" do
|
128
|
+
result = baidu.pages('baidu.com')
|
129
|
+
result.class.should == Query::Result::Baidu
|
130
|
+
result.count.should == 100000000
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should return 100,000,000" do
|
134
|
+
result = baidu.links('baidu.com')
|
135
|
+
result.class.should == Query::Result::Baidu
|
136
|
+
result.count.should == 100000000
|
137
|
+
end
|
138
|
+
it "should return 100,000,000" do
|
139
|
+
result = baidu.pages_with('baidu.com','baidu.com')
|
140
|
+
result.class.should == Query::Result::Baidu
|
141
|
+
result.count.should == 100000000
|
142
|
+
end
|
143
|
+
it "查询已经被收录的页面收录情况时,应返回true" do
|
144
|
+
baidu.indexed?('http://www.baidu.com').should == true
|
145
|
+
end
|
146
|
+
it "查询一个不存在的页面收录情况时,应返回true" do
|
147
|
+
baidu.indexed?('http://zxv.not-exists.com').should == false
|
148
|
+
end
|
149
|
+
page1 = Query::Engine::Baidu.query('seoaqua.com')
|
150
|
+
it "查询结果应该都能拿到title,href,host" do
|
151
|
+
page1.seo_ranks.each do |id,rank|
|
152
|
+
rank['href'].should_not == nil
|
153
|
+
rank['text'].should_not == nil
|
154
|
+
rank['host'].should_not == nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
data/spec/baidu2_spec.rb
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Query::Result::Baidu do
|
4
|
+
subject{Query::Result::Baidu.new(File.read($sample_baidu2))}
|
5
|
+
|
6
|
+
it "can click the next page button" do
|
7
|
+
subject.next_url.should == '/s?wd=%E9%85%92%E5%BA%97%E9%A2%84%E8%AE%A2%E7%BD%91&pn=10&tn=baiduhome_pg&ie=utf-8&f=3&usm=1&rsv_page=1'
|
8
|
+
end
|
9
|
+
|
10
|
+
it "have 100000000 results" do
|
11
|
+
subject.count.should == 100000000
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#seo_ranks' do
|
15
|
+
it "should put hotel.qunar.com to be on first" do
|
16
|
+
subject.seo_ranks.first[:host].should == 'hotel.qunar.com'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should put 北京酒店预订_8371家特惠酒店_百度品质保证_去哪儿网提供 to be the first title" do
|
20
|
+
subject.seo_ranks.first[:text].should == '北京酒店预订_8371家特惠酒店_百度品质保证_去哪儿网提供'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should put 'http://jiudian.qunar.com/' to be the second url" do
|
24
|
+
subject.seo_ranks[1][:href].should == 'http://www.baidu.com/link?url=ZpGwUrZ8xUUgBQofg1TiNH1n_Ki3QWE62jvjkGvwwZ70wQPxFJxSD1uunh0uDwLM'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should have href,text,host elements for each seo result" do
|
28
|
+
subject.seo_ranks.each do |seo_rank|
|
29
|
+
seo_rank[:href].should_not == nil
|
30
|
+
seo_rank[:text].should_not == nil
|
31
|
+
seo_rank[:host].should_not == nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '#ads_top' do
|
37
|
+
it "should have 6 top ads" do
|
38
|
+
subject.ads_top.size.should == 4
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should find hotel.elong.com at the first position in the top ads" do
|
42
|
+
subject.ads_top[0][:host].should == 'www.agoda.com'
|
43
|
+
end
|
44
|
+
|
45
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
46
|
+
subject.ads_top.class.should == Array
|
47
|
+
subject.ads_top.each do |ad_top|
|
48
|
+
ad_top.should have_key(:rank)
|
49
|
+
ad_top.should have_key(:host)
|
50
|
+
ad_top.should have_key(:href)
|
51
|
+
ad_top.should have_key(:text)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe '#ads_right' do
|
57
|
+
it "should have 8 right ads" do
|
58
|
+
subject.ads_right.size.should == 8
|
59
|
+
end
|
60
|
+
|
61
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
62
|
+
subject.ads_right.class.should == Array
|
63
|
+
subject.ads_right.each do |ad_right|
|
64
|
+
ad_right.should have_key(:rank)
|
65
|
+
ad_right.should have_key(:host)
|
66
|
+
ad_right.should have_key(:href)
|
67
|
+
ad_right.should have_key(:text)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe '#ads_bottom' do
|
73
|
+
it "should have zero bottom ads" do
|
74
|
+
subject.ads_bottom.size.should == 0
|
75
|
+
end
|
76
|
+
|
77
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
78
|
+
subject.ads_bottom.class.should == Array
|
79
|
+
subject.ads_bottom.each do |ad_bottom|
|
80
|
+
ad_bottom.should have_key(:rank)
|
81
|
+
ad_bottom.should have_key(:host)
|
82
|
+
ad_bottom.should have_key(:href)
|
83
|
+
ad_bottom.should have_key(:text)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
__END__
|
89
|
+
describe Query::Engine::Baidu do
|
90
|
+
page = Query::Engine::Baidu.query '百度'
|
91
|
+
|
92
|
+
it "should return Query::Result::Baidu" do
|
93
|
+
page.class.should == Query::Result::Baidu
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should return 100,000,000" do
|
97
|
+
page.count.should > 100000
|
98
|
+
end
|
99
|
+
it "should return 1" do
|
100
|
+
page.rank('www.baidu.com').should == 1
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should return Query::Result::Baidu" do
|
104
|
+
page.next.class.should == Query::Result::Baidu
|
105
|
+
end
|
106
|
+
|
107
|
+
it "should return true" do
|
108
|
+
bool = Query::Engine::Baidu.popular?'百度'
|
109
|
+
bool.should == true
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should return false" do
|
113
|
+
bool = Query::Engine::Baidu.popular?'lavataliuming'
|
114
|
+
bool.should == false
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should return over 5 words beginning with the query_word" do
|
118
|
+
query_word = '为'
|
119
|
+
suggestions = Query::Engine::Baidu.suggestions(query_word)
|
120
|
+
suggestions.size.should > 5
|
121
|
+
suggestions.each do |suggestion|
|
122
|
+
suggestion[0].should == query_word
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should return 100,000,000" do
|
127
|
+
result = baidu.pages('baidu.com')
|
128
|
+
result.class.should == Query::Result::Baidu
|
129
|
+
result.count.should == 100000000
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should return 100,000,000" do
|
133
|
+
result = baidu.links('baidu.com')
|
134
|
+
result.class.should == Query::Result::Baidu
|
135
|
+
result.count.should == 100000000
|
136
|
+
end
|
137
|
+
it "should return 100,000,000" do
|
138
|
+
result = baidu.pages_with('baidu.com','baidu.com')
|
139
|
+
result.class.should == Query::Result::Baidu
|
140
|
+
result.count.should == 100000000
|
141
|
+
end
|
142
|
+
it "查询已经被收录的页面收录情况时,应返回true" do
|
143
|
+
baidu.indexed?('http://www.baidu.com').should == true
|
144
|
+
end
|
145
|
+
it "查询一个不存在的页面收录情况时,应返回true" do
|
146
|
+
baidu.indexed?('http://zxv.not-exists.com').should == false
|
147
|
+
end
|
148
|
+
page1 = Query::Engine::Baidu.query('seoaqua.com')
|
149
|
+
it "查询结果应该都能拿到title,href,host" do
|
150
|
+
page1.seo_ranks.each do |id,rank|
|
151
|
+
rank['href'].should_not == nil
|
152
|
+
rank['text'].should_not == nil
|
153
|
+
rank['host'].should_not == nil
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|