query 0.0.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -1
- data/Gemfile +3 -1
- data/README.md +6 -1
- data/lib/query/engine/baidu.rb +12 -8
- data/lib/query/engine/baidu_mobile.rb +4 -4
- data/lib/query/engine/{qihoo.rb → qihu.rb} +8 -3
- data/lib/query/engine/{qihoo_mobile.rb → qihu_mobile.rb} +0 -0
- data/lib/query/engine/sogou.rb +45 -0
- data/lib/query/engine/sogou_mobile.rb +21 -0
- data/lib/query/engine.rb +11 -4
- data/lib/query/result/baidu.rb +57 -91
- data/lib/query/result/baidu_mobile.rb +49 -93
- data/lib/query/result/qihu.rb +66 -0
- data/lib/query/result/{qihoo_mobile.rb → qihu_mobile.rb} +1 -1
- data/lib/query/result/sogou.rb +103 -0
- data/lib/query/result/sogou_mobile.rb +51 -0
- data/lib/query/result.rb +47 -4
- data/lib/query/version.rb +1 -1
- data/lib/query.rb +6 -8
- data/query.gemspec +2 -3
- data/spec/baidu1_spec.rb +157 -0
- data/spec/baidu2_spec.rb +156 -0
- data/spec/mbaidu1_spec.rb +167 -0
- data/spec/msogou_spec.rb +91 -0
- data/spec/qihu_spec.rb +87 -0
- data/spec/samples/baidu1.html +521 -0
- data/spec/samples/baidu2.html +662 -0
- data/spec/samples/mbaidu1.html +2 -0
- data/spec/samples/mbaidu2.html +2 -0
- data/spec/samples/msogou.html +474 -0
- data/spec/samples/qihu.html +506 -0
- data/spec/samples/sogou.html +629 -0
- data/spec/sogou_mobile_spec.rb +86 -0
- data/spec/sogou_spec.rb +107 -0
- data/spec/spec_helper.rb +12 -1
- metadata +56 -31
- data/lib/query/engine/base.rb +0 -16
- data/lib/query/result/base.rb +0 -50
- data/lib/query/result/qihoo.rb +0 -75
- data/spec/baidu_mobile_spec.rb +0 -19
- data/spec/baidu_spec.rb +0 -73
- data/spec/qihoo_spec.rb +0 -27
@@ -0,0 +1,86 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Query::Result::SogouMobile do
|
4
|
+
subject{Query::Engine::SogouMobile.query('酒店预订')}
|
5
|
+
it "is an instance of #{Query::Result::SogouMobile}" do
|
6
|
+
subject.class.should == Query::Result::SogouMobile
|
7
|
+
end
|
8
|
+
|
9
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
10
|
+
subject.ads_top.class.should == Array
|
11
|
+
subject.ads_top.each do |ad_top|
|
12
|
+
ad_top.should have_key(:rank)
|
13
|
+
ad_top.should have_key(:host)
|
14
|
+
ad_top.should have_key(:href)
|
15
|
+
ad_top.should have_key(:text)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
20
|
+
subject.ads_right.class.should == Array
|
21
|
+
subject.ads_right.each do |ad_right|
|
22
|
+
ad_right.should have_key(:rank)
|
23
|
+
ad_right.should have_key(:host)
|
24
|
+
ad_right.should have_key(:href)
|
25
|
+
ad_right.should have_key(:text)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
30
|
+
subject.ads_bottom.class.should == Array
|
31
|
+
subject.ads_bottom.each do |ad_bottom|
|
32
|
+
ad_bottom.should have_key(:rank)
|
33
|
+
ad_bottom.should have_key(:host)
|
34
|
+
ad_bottom.should have_key(:href)
|
35
|
+
ad_bottom.should have_key(:text)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
result = Query::Engine::SogouMobile.query('酒店预订')
|
41
|
+
ads_top = result.ads_top
|
42
|
+
describe "types check" do
|
43
|
+
it "should return Query::Result::SogouMobile" do
|
44
|
+
result.class.should == Query::Result::SogouMobile
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should return Array" do
|
48
|
+
ads_top.class.should == Array
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should has keys" do
|
52
|
+
ads_top[0].should have_key(:rank)
|
53
|
+
ads_top[0].has_key?(:domain)
|
54
|
+
ads_top[0].has_key?(:host)
|
55
|
+
ads_top[0].has_key?(:href)
|
56
|
+
ads_top[0].has_key?(:title)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe Query::Result::SogouMobile do
|
61
|
+
subject{Query::Engine::SogouMobile.query '中华人民共和国中央人民政府'}
|
62
|
+
|
63
|
+
it "should be an instance of Query::Result::Sogou" do
|
64
|
+
subject.class.should == Query::Result::SogouMobile
|
65
|
+
end
|
66
|
+
|
67
|
+
it "'s next page is another instance of Query::Result::Sogou" do
|
68
|
+
subject.next.class.should == Query::Result::SogouMobile
|
69
|
+
end
|
70
|
+
|
71
|
+
it "have over 1000 results" do
|
72
|
+
subject.count.should be_nil
|
73
|
+
end
|
74
|
+
|
75
|
+
it "puts www.gov.cn to the first place of seo_ranks" do
|
76
|
+
subject.rank('www.gov.cn')[0].should == 1
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should have href,text,host elements for each seo result" do
|
80
|
+
subject.seo_ranks.each do |seo_rank|
|
81
|
+
seo_rank[:href].should_not == nil
|
82
|
+
seo_rank[:text].should_not == nil
|
83
|
+
seo_rank[:host].should_not == nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/spec/sogou_spec.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Query::Result::Sogou do
|
5
|
+
subject{Query::Result::Sogou.new(File.read($sample_sogou))}
|
6
|
+
|
7
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
8
|
+
subject.ads_top.class.should == Array
|
9
|
+
subject.ads_top.each do |ad_top|
|
10
|
+
ad_top.should have_key(:rank)
|
11
|
+
ad_top.should have_key(:host)
|
12
|
+
ad_top.should have_key(:href)
|
13
|
+
ad_top.should have_key(:text)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
18
|
+
subject.ads_right.class.should == Array
|
19
|
+
subject.ads_right.each do |ad_right|
|
20
|
+
ad_right.should have_key(:rank)
|
21
|
+
ad_right.should have_key(:host)
|
22
|
+
ad_right.should have_key(:href)
|
23
|
+
ad_right.should have_key(:text)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
28
|
+
subject.ads_bottom.class.should == Array
|
29
|
+
subject.ads_bottom.each do |ad_bottom|
|
30
|
+
ad_bottom.should have_key(:rank)
|
31
|
+
ad_bottom.should have_key(:host)
|
32
|
+
ad_bottom.should have_key(:href)
|
33
|
+
ad_bottom.should have_key(:text)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
describe Query::Result::Sogou do
|
40
|
+
subject{Query::Engine::Sogou.query '中华人民共和国中央人民政府'}
|
41
|
+
|
42
|
+
it "should be an instance of Query::Result::Sogou" do
|
43
|
+
subject.class.should == Query::Result::Sogou
|
44
|
+
end
|
45
|
+
|
46
|
+
it "'s next page is another instance of Query::Result::Sogou" do
|
47
|
+
subject.next.class.should == Query::Result::Sogou
|
48
|
+
end
|
49
|
+
|
50
|
+
it "have over 1000 results" do
|
51
|
+
subject.count.should > 1000
|
52
|
+
end
|
53
|
+
|
54
|
+
it "puts www.gov.cn to the first place of seo_ranks" do
|
55
|
+
subject.rank('www.gov.cn')[0].should == 1
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should have href,text,host elements for each seo result" do
|
59
|
+
subject.seo_ranks.each do |seo_rank|
|
60
|
+
seo_rank[:href].should_not == nil
|
61
|
+
seo_rank[:text].should_not == nil
|
62
|
+
seo_rank[:host].should_not == nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe Query::Engine::Sogou do
|
68
|
+
subject{Query::Engine::Sogou}
|
69
|
+
|
70
|
+
it "have more than 1 million baidu.com pages indexed" do
|
71
|
+
result = subject.pages('baidu.com')
|
72
|
+
result.class.should == Query::Result::Sogou
|
73
|
+
result.count.should > 1000000
|
74
|
+
end
|
75
|
+
|
76
|
+
it "have more than 100 links to baidu.com" do
|
77
|
+
result = subject.links('baidu.com')
|
78
|
+
result.class.should == Query::Result::Sogou
|
79
|
+
result.count.should > 100
|
80
|
+
end
|
81
|
+
|
82
|
+
it "查询已经被收录的页面收录情况时,应返回true" do
|
83
|
+
pending
|
84
|
+
subject.indexed?('http://www.baidu.com').should == true
|
85
|
+
end
|
86
|
+
|
87
|
+
it "查询一个不存在的页面收录情况时,应返回true" do
|
88
|
+
pending
|
89
|
+
subject.indexed?('http://zxv.not-exists.com').should == false
|
90
|
+
end
|
91
|
+
|
92
|
+
describe '#suggestions' do
|
93
|
+
query = '搜狗'
|
94
|
+
subject{Query::Engine::Sogou.suggestions(query)}
|
95
|
+
it 'should have more than one suggestions' do
|
96
|
+
subject.size.should > 1
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'gives all suggestions with the query word at the start' do
|
100
|
+
subject.each do |suggestion|
|
101
|
+
suggestion.should start_with query
|
102
|
+
# (suggestion.start_with?query).should_be true
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
data/spec/spec_helper.rb
CHANGED
@@ -1 +1,12 @@
|
|
1
|
-
require 'query'
|
1
|
+
require 'query'
|
2
|
+
require 'pathname'
|
3
|
+
require 'awesome_print'
|
4
|
+
path_root = Pathname.new(__dir__)
|
5
|
+
path_samples = File.join(path_root,'samples')
|
6
|
+
$sample_qihu = File.join(path_samples,'qihu.html')
|
7
|
+
$sample_sogou = File.join(path_samples,'sogou.html')
|
8
|
+
$sample_msogou = File.join(path_samples,'msogou.html')
|
9
|
+
$sample_baidu1 = File.join(path_samples,'baidu1.html')
|
10
|
+
$sample_baidu2 = File.join(path_samples,'baidu2.html')
|
11
|
+
$sample_mbaidu1 = File.join(path_samples,'mbaidu1.html')
|
12
|
+
$sample_mbaidu2 = File.join(path_samples,'mbaidu2.html')
|
metadata
CHANGED
@@ -1,94 +1,94 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: query
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- seoaqua
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: addressable
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: httparty
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
-
description: This GEM is designed to work for SEOers who need to fetch query
|
84
|
-
results from all kinds of search engines
|
83
|
+
description: This GEM is designed to work for Chinese SEOers who need to fetch query
|
84
|
+
and parse results from all kinds of search engines
|
85
85
|
email:
|
86
86
|
- seoaqua@me.com
|
87
87
|
executables: []
|
88
88
|
extensions: []
|
89
89
|
extra_rdoc_files: []
|
90
90
|
files:
|
91
|
-
- .gitignore
|
91
|
+
- ".gitignore"
|
92
92
|
- Gemfile
|
93
93
|
- LICENSE
|
94
94
|
- LICENSE.txt
|
@@ -98,20 +98,33 @@ files:
|
|
98
98
|
- lib/query/engine.rb
|
99
99
|
- lib/query/engine/baidu.rb
|
100
100
|
- lib/query/engine/baidu_mobile.rb
|
101
|
-
- lib/query/engine/
|
102
|
-
- lib/query/engine/
|
103
|
-
- lib/query/engine/
|
101
|
+
- lib/query/engine/qihu.rb
|
102
|
+
- lib/query/engine/qihu_mobile.rb
|
103
|
+
- lib/query/engine/sogou.rb
|
104
|
+
- lib/query/engine/sogou_mobile.rb
|
104
105
|
- lib/query/result.rb
|
105
106
|
- lib/query/result/baidu.rb
|
106
107
|
- lib/query/result/baidu_mobile.rb
|
107
|
-
- lib/query/result/
|
108
|
-
- lib/query/result/
|
109
|
-
- lib/query/result/
|
108
|
+
- lib/query/result/qihu.rb
|
109
|
+
- lib/query/result/qihu_mobile.rb
|
110
|
+
- lib/query/result/sogou.rb
|
111
|
+
- lib/query/result/sogou_mobile.rb
|
110
112
|
- lib/query/version.rb
|
111
113
|
- query.gemspec
|
112
|
-
- spec/
|
113
|
-
- spec/
|
114
|
-
- spec/
|
114
|
+
- spec/baidu1_spec.rb
|
115
|
+
- spec/baidu2_spec.rb
|
116
|
+
- spec/mbaidu1_spec.rb
|
117
|
+
- spec/msogou_spec.rb
|
118
|
+
- spec/qihu_spec.rb
|
119
|
+
- spec/samples/baidu1.html
|
120
|
+
- spec/samples/baidu2.html
|
121
|
+
- spec/samples/mbaidu1.html
|
122
|
+
- spec/samples/mbaidu2.html
|
123
|
+
- spec/samples/msogou.html
|
124
|
+
- spec/samples/qihu.html
|
125
|
+
- spec/samples/sogou.html
|
126
|
+
- spec/sogou_mobile_spec.rb
|
127
|
+
- spec/sogou_spec.rb
|
115
128
|
- spec/spec_helper.rb
|
116
129
|
homepage: https://github.com/seoaqua/query
|
117
130
|
licenses:
|
@@ -123,22 +136,34 @@ require_paths:
|
|
123
136
|
- lib
|
124
137
|
required_ruby_version: !ruby/object:Gem::Requirement
|
125
138
|
requirements:
|
126
|
-
- -
|
139
|
+
- - ">="
|
127
140
|
- !ruby/object:Gem::Version
|
128
141
|
version: '0'
|
129
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
143
|
requirements:
|
131
|
-
- -
|
144
|
+
- - ">="
|
132
145
|
- !ruby/object:Gem::Version
|
133
146
|
version: '0'
|
134
147
|
requirements: []
|
135
148
|
rubyforge_project:
|
136
|
-
rubygems_version: 2.1
|
149
|
+
rubygems_version: 2.2.1
|
137
150
|
signing_key:
|
138
151
|
specification_version: 4
|
139
|
-
summary:
|
152
|
+
summary: I dont have time to write the document yet. Usage is almost within rspec
|
153
|
+
tests. Any questions,pls contact me with QQ628552
|
140
154
|
test_files:
|
141
|
-
- spec/
|
142
|
-
- spec/
|
143
|
-
- spec/
|
155
|
+
- spec/baidu1_spec.rb
|
156
|
+
- spec/baidu2_spec.rb
|
157
|
+
- spec/mbaidu1_spec.rb
|
158
|
+
- spec/msogou_spec.rb
|
159
|
+
- spec/qihu_spec.rb
|
160
|
+
- spec/samples/baidu1.html
|
161
|
+
- spec/samples/baidu2.html
|
162
|
+
- spec/samples/mbaidu1.html
|
163
|
+
- spec/samples/mbaidu2.html
|
164
|
+
- spec/samples/msogou.html
|
165
|
+
- spec/samples/qihu.html
|
166
|
+
- spec/samples/sogou.html
|
167
|
+
- spec/sogou_mobile_spec.rb
|
168
|
+
- spec/sogou_spec.rb
|
144
169
|
- spec/spec_helper.rb
|
data/lib/query/engine/base.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
module Query
|
2
|
-
module Engine
|
3
|
-
class Base
|
4
|
-
attr_accessor :perpage
|
5
|
-
#是否收录
|
6
|
-
# def initialize(perpage = 100)
|
7
|
-
# @perpage = perpage#只允许10或100
|
8
|
-
# end
|
9
|
-
def indexed?(url)
|
10
|
-
URI(url)
|
11
|
-
result = query(url)
|
12
|
-
return result.has_result?
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
data/lib/query/result/base.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
module Query
|
2
|
-
module Result
|
3
|
-
class Base
|
4
|
-
attr_accessor :baseuri,:pagenumber,:perpage
|
5
|
-
def initialize(page)
|
6
|
-
@page = Nokogiri::HTML page
|
7
|
-
@pagenumber = 1
|
8
|
-
end
|
9
|
-
# def initialize(page,baseuri,pagenumber=1,perpage=100)
|
10
|
-
# @page = Nokogiri::HTML page
|
11
|
-
# @baseuri = baseuri
|
12
|
-
# # @host = URI(baseuri).host
|
13
|
-
# @pagenumber = pagenumber
|
14
|
-
# @perpage = perpage
|
15
|
-
# end
|
16
|
-
def whole
|
17
|
-
{
|
18
|
-
'ads_top'=>ads_top,
|
19
|
-
'ads_right'=>ads_right,
|
20
|
-
'ads_bottom'=>ads_bottom,
|
21
|
-
'ranks'=>ranks
|
22
|
-
}
|
23
|
-
end
|
24
|
-
#返回当前页中host满足条件的结果
|
25
|
-
def ranks_for(specific_host)
|
26
|
-
host_ranks = Hash.new
|
27
|
-
ranks.each do |id,line|
|
28
|
-
if specific_host.class == Regexp
|
29
|
-
host_ranks[id] = line if line['host'] =~ specific_host
|
30
|
-
elsif specific_host.class == String
|
31
|
-
host_ranks[id] = line if line['host'] == specific_host
|
32
|
-
end
|
33
|
-
end
|
34
|
-
host_ranks
|
35
|
-
end
|
36
|
-
#return the top rank number from @ranks with the input host
|
37
|
-
def rank(host)#on base of ranks
|
38
|
-
ranks.each do |id,line|
|
39
|
-
id = id.to_i
|
40
|
-
if host.class == Regexp
|
41
|
-
return id if line['host'] =~ host
|
42
|
-
elsif host.class == String
|
43
|
-
return id if line['host'] == host
|
44
|
-
end
|
45
|
-
end
|
46
|
-
return nil
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
data/lib/query/result/qihoo.rb
DELETED
@@ -1,75 +0,0 @@
|
|
1
|
-
module Query
|
2
|
-
module Result
|
3
|
-
class Qihoo < Base
|
4
|
-
# include Query::Result
|
5
|
-
Host = 'www.so.com'
|
6
|
-
#返回所有当前页的排名结果
|
7
|
-
def ranks
|
8
|
-
return @ranks unless @ranks.nil?
|
9
|
-
@ranks = Hash.new
|
10
|
-
# id = (@pagenumber - 1) * 10
|
11
|
-
id = 0
|
12
|
-
@page.search('//li[@class="res-list"]').each do |li|
|
13
|
-
a = li.search("h3/a").first
|
14
|
-
url = li.search("cite")
|
15
|
-
next if a['data-pos'].nil?
|
16
|
-
id += 1
|
17
|
-
text = a.text.strip
|
18
|
-
href = a['href']
|
19
|
-
url = url.first.text
|
20
|
-
host = Addressable::URI.parse(URI.encode("http://#{url}")).host
|
21
|
-
@ranks[id.to_s] = {'href'=>a['href'],'text'=>text,'host'=>host}
|
22
|
-
end
|
23
|
-
@ranks
|
24
|
-
end
|
25
|
-
def ads_top
|
26
|
-
id = 0
|
27
|
-
result = []
|
28
|
-
@page.search("//ul[@id='djbox']/li").each do |li|
|
29
|
-
id += 1
|
30
|
-
title = li.search("a").first.text
|
31
|
-
href = li.search("cite").first.text.downcase
|
32
|
-
host = Addressable::URI.parse(URI.encode(href)).host
|
33
|
-
result[id] = {'title'=>title,'href'=>href,'host'=>host}
|
34
|
-
end
|
35
|
-
result
|
36
|
-
end
|
37
|
-
def ads_bottom
|
38
|
-
[]
|
39
|
-
end
|
40
|
-
def ads_right
|
41
|
-
id = 0
|
42
|
-
result = []
|
43
|
-
@page.search("//ul[@id='rightbox']/li").each do |li|
|
44
|
-
id += 1
|
45
|
-
title = li.search("a").first.text
|
46
|
-
href = li.search("cite").first.text.downcase
|
47
|
-
host = Addressable::URI.parse(URI.encode(href)).host
|
48
|
-
result[id] = {'title'=>title,'href'=>href,'host'=>host}
|
49
|
-
end
|
50
|
-
result
|
51
|
-
end
|
52
|
-
def related_keywords
|
53
|
-
[]
|
54
|
-
end
|
55
|
-
#下一页
|
56
|
-
def next
|
57
|
-
next_href = @page.xpath('//a[@id="snext"]')
|
58
|
-
return false if next_href.empty?
|
59
|
-
next_href = next_href.first['href']
|
60
|
-
next_href = URI.join(@baseuri,next_href).to_s
|
61
|
-
# next_href = URI.join("http://#{@host}",next_href).to_s
|
62
|
-
next_page = HTTParty.get(next_href).next
|
63
|
-
r =Query::Result::Qihoo.new(next_page)
|
64
|
-
r.baseuri=next_href
|
65
|
-
r.pagenumber=@pagenumber+1
|
66
|
-
r
|
67
|
-
#@page = MbaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil?
|
68
|
-
end
|
69
|
-
#有结果
|
70
|
-
def has_result?
|
71
|
-
!@page.search('//div[@id="main"]/h3').text().include?'没有找到该URL'
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
data/spec/baidu_mobile_spec.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
#coding:UTF-8
|
2
|
-
require 'spec_helper'
|
3
|
-
describe Query::Engine::BaiduMobile do
|
4
|
-
mbaidu = Query::Engine::BaiduMobile.new
|
5
|
-
page = mbaidu.query '百度'
|
6
|
-
it "应返回#{Query::Engine::BaiduMobile}" do
|
7
|
-
page.class.should == Query::Result::BaiduMobile
|
8
|
-
end
|
9
|
-
it "下一页也应是Query::Engine::BaiduMobile" do
|
10
|
-
page.next.class.should == Query::Result::BaiduMobile
|
11
|
-
page.next.next.class.should == Query::Result::BaiduMobile
|
12
|
-
end
|
13
|
-
it "百度百科域名应该大于1" do
|
14
|
-
page.rank('wapbaike.baidu.com').should > 1
|
15
|
-
end
|
16
|
-
it "百度无线域名应该在10以内" do
|
17
|
-
page.rank('m.baidu.com').should < 11
|
18
|
-
end
|
19
|
-
end
|
data/spec/baidu_spec.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
#coding:UTF-8
|
2
|
-
require 'spec_helper'
|
3
|
-
describe Query::Engine::Baidu do
|
4
|
-
baidu = Query::Engine::Baidu.new
|
5
|
-
page = baidu.query '百度'
|
6
|
-
|
7
|
-
it "should return Query::Result::Baidu" do
|
8
|
-
page.class.should == Query::Result::Baidu
|
9
|
-
end
|
10
|
-
|
11
|
-
it "should return 100,000,000" do
|
12
|
-
page.count.should > 100000
|
13
|
-
end
|
14
|
-
it "should return 1" do
|
15
|
-
page.rank('www.baidu.com').should == 1
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should return Query::Result::Baidu" do
|
19
|
-
page.next.class.should == Query::Result::Baidu
|
20
|
-
end
|
21
|
-
|
22
|
-
it "should return true" do
|
23
|
-
bool = Query::Engine::Baidu.popular?'百度'
|
24
|
-
bool.should == true
|
25
|
-
end
|
26
|
-
|
27
|
-
it "should return false" do
|
28
|
-
bool = Query::Engine::Baidu.popular?'lavataliuming'
|
29
|
-
bool.should == false
|
30
|
-
end
|
31
|
-
|
32
|
-
it "should return over 5 words beginning with the query_word" do
|
33
|
-
query_word = '为'
|
34
|
-
suggestions = Query::Engine::Baidu.suggestions(query_word)
|
35
|
-
suggestions.size.should > 5
|
36
|
-
suggestions.each do |suggestion|
|
37
|
-
suggestion[0].should == query_word
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
it "should return 100,000,000" do
|
42
|
-
result = baidu.pages('baidu.com')
|
43
|
-
result.class.should == Query::Result::Baidu
|
44
|
-
result.count.should == 100000000
|
45
|
-
end
|
46
|
-
|
47
|
-
it "should return 100,000,000" do
|
48
|
-
result = baidu.links('baidu.com')
|
49
|
-
result.class.should == Query::Result::Baidu
|
50
|
-
result.count.should == 100000000
|
51
|
-
end
|
52
|
-
it "should return 100,000,000" do
|
53
|
-
result = baidu.pages_with('baidu.com','baidu.com')
|
54
|
-
result.class.should == Query::Result::Baidu
|
55
|
-
result.count.should == 100000000
|
56
|
-
end
|
57
|
-
it "查询已经被收录的页面收录情况时,应返回true" do
|
58
|
-
baidu.indexed?('http://www.baidu.com').should == true
|
59
|
-
end
|
60
|
-
it "查询一个不存在的页面收录情况时,应返回true" do
|
61
|
-
baidu.indexed?('http://zxv.not-exists.com').should == false
|
62
|
-
end
|
63
|
-
page1 = baidu.query('seoaqua.com')
|
64
|
-
it "查询结果应该都能拿到title,href,host" do
|
65
|
-
page1.ranks.each do |id,rank|
|
66
|
-
rank['href'].should_not == nil
|
67
|
-
rank['text'].should_not == nil
|
68
|
-
rank['host'].should_not == nil
|
69
|
-
end
|
70
|
-
end
|
71
|
-
# ads_page = baidu.query '减肥药'
|
72
|
-
|
73
|
-
end
|
data/spec/qihoo_spec.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
#coding:UTF-8
|
2
|
-
require 'spec_helper'
|
3
|
-
describe Query::Engine::Qihoo do
|
4
|
-
qihoo = Query::Engine::Qihoo.new
|
5
|
-
page = qihoo.query '奇虎'
|
6
|
-
page2 = page.next
|
7
|
-
page3 = page2.next
|
8
|
-
it "查询关键词'奇虎'后,应返回正确的实例" do
|
9
|
-
page.class.should == Query::Result::Qihoo
|
10
|
-
end
|
11
|
-
it "查询关键词'奇虎'后,下一页也应是MbaiduResult的实例" do
|
12
|
-
page2.class.should == Query::Result::Qihoo
|
13
|
-
end
|
14
|
-
it "查询关键词'奇虎'后,下一页,再下一页也应是MbaiduResult的实例" do
|
15
|
-
page3.class.should == Query::Result::Qihoo
|
16
|
-
end
|
17
|
-
|
18
|
-
it "查询关键词'奇虎'后,奇虎首页域名应该等于1" do
|
19
|
-
page.rank('www.qihoo.com').should == 1
|
20
|
-
end
|
21
|
-
it "查询已经被收录的页面收录情况时,应返回true" do
|
22
|
-
qihoo.indexed?('http://www.360.cn').should == true
|
23
|
-
end
|
24
|
-
it "查询一个不存在的页面收录情况时,应返回true" do
|
25
|
-
qihoo.indexed?('http://zxv.not-exists.com').should == false
|
26
|
-
end
|
27
|
-
end
|