query 0.0.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -1
- data/Gemfile +3 -1
- data/README.md +6 -1
- data/lib/query/engine/baidu.rb +12 -8
- data/lib/query/engine/baidu_mobile.rb +4 -4
- data/lib/query/engine/{qihoo.rb → qihu.rb} +8 -3
- data/lib/query/engine/{qihoo_mobile.rb → qihu_mobile.rb} +0 -0
- data/lib/query/engine/sogou.rb +45 -0
- data/lib/query/engine/sogou_mobile.rb +21 -0
- data/lib/query/engine.rb +11 -4
- data/lib/query/result/baidu.rb +57 -91
- data/lib/query/result/baidu_mobile.rb +49 -93
- data/lib/query/result/qihu.rb +66 -0
- data/lib/query/result/{qihoo_mobile.rb → qihu_mobile.rb} +1 -1
- data/lib/query/result/sogou.rb +103 -0
- data/lib/query/result/sogou_mobile.rb +51 -0
- data/lib/query/result.rb +47 -4
- data/lib/query/version.rb +1 -1
- data/lib/query.rb +6 -8
- data/query.gemspec +2 -3
- data/spec/baidu1_spec.rb +157 -0
- data/spec/baidu2_spec.rb +156 -0
- data/spec/mbaidu1_spec.rb +167 -0
- data/spec/msogou_spec.rb +91 -0
- data/spec/qihu_spec.rb +87 -0
- data/spec/samples/baidu1.html +521 -0
- data/spec/samples/baidu2.html +662 -0
- data/spec/samples/mbaidu1.html +2 -0
- data/spec/samples/mbaidu2.html +2 -0
- data/spec/samples/msogou.html +474 -0
- data/spec/samples/qihu.html +506 -0
- data/spec/samples/sogou.html +629 -0
- data/spec/sogou_mobile_spec.rb +86 -0
- data/spec/sogou_spec.rb +107 -0
- data/spec/spec_helper.rb +12 -1
- metadata +56 -31
- data/lib/query/engine/base.rb +0 -16
- data/lib/query/result/base.rb +0 -50
- data/lib/query/result/qihoo.rb +0 -75
- data/spec/baidu_mobile_spec.rb +0 -19
- data/spec/baidu_spec.rb +0 -73
- data/spec/qihoo_spec.rb +0 -27
@@ -0,0 +1,86 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Query::Result::SogouMobile do
|
4
|
+
subject{Query::Engine::SogouMobile.query('酒店预订')}
|
5
|
+
it "is an instance of #{Query::Result::SogouMobile}" do
|
6
|
+
subject.class.should == Query::Result::SogouMobile
|
7
|
+
end
|
8
|
+
|
9
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
10
|
+
subject.ads_top.class.should == Array
|
11
|
+
subject.ads_top.each do |ad_top|
|
12
|
+
ad_top.should have_key(:rank)
|
13
|
+
ad_top.should have_key(:host)
|
14
|
+
ad_top.should have_key(:href)
|
15
|
+
ad_top.should have_key(:text)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
20
|
+
subject.ads_right.class.should == Array
|
21
|
+
subject.ads_right.each do |ad_right|
|
22
|
+
ad_right.should have_key(:rank)
|
23
|
+
ad_right.should have_key(:host)
|
24
|
+
ad_right.should have_key(:href)
|
25
|
+
ad_right.should have_key(:text)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
30
|
+
subject.ads_bottom.class.should == Array
|
31
|
+
subject.ads_bottom.each do |ad_bottom|
|
32
|
+
ad_bottom.should have_key(:rank)
|
33
|
+
ad_bottom.should have_key(:host)
|
34
|
+
ad_bottom.should have_key(:href)
|
35
|
+
ad_bottom.should have_key(:text)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
result = Query::Engine::SogouMobile.query('酒店预订')
|
41
|
+
ads_top = result.ads_top
|
42
|
+
describe "types check" do
|
43
|
+
it "should return Query::Result::SogouMobile" do
|
44
|
+
result.class.should == Query::Result::SogouMobile
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should return Array" do
|
48
|
+
ads_top.class.should == Array
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should has keys" do
|
52
|
+
ads_top[0].should have_key(:rank)
|
53
|
+
ads_top[0].has_key?(:domain)
|
54
|
+
ads_top[0].has_key?(:host)
|
55
|
+
ads_top[0].has_key?(:href)
|
56
|
+
ads_top[0].has_key?(:title)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe Query::Result::SogouMobile do
|
61
|
+
subject{Query::Engine::SogouMobile.query '中华人民共和国中央人民政府'}
|
62
|
+
|
63
|
+
it "should be an instance of Query::Result::Sogou" do
|
64
|
+
subject.class.should == Query::Result::SogouMobile
|
65
|
+
end
|
66
|
+
|
67
|
+
it "'s next page is another instance of Query::Result::Sogou" do
|
68
|
+
subject.next.class.should == Query::Result::SogouMobile
|
69
|
+
end
|
70
|
+
|
71
|
+
it "have over 1000 results" do
|
72
|
+
subject.count.should be_nil
|
73
|
+
end
|
74
|
+
|
75
|
+
it "puts www.gov.cn to the first place of seo_ranks" do
|
76
|
+
subject.rank('www.gov.cn')[0].should == 1
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should have href,text,host elements for each seo result" do
|
80
|
+
subject.seo_ranks.each do |seo_rank|
|
81
|
+
seo_rank[:href].should_not == nil
|
82
|
+
seo_rank[:text].should_not == nil
|
83
|
+
seo_rank[:host].should_not == nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/spec/sogou_spec.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
#coding:UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Query::Result::Sogou do
|
5
|
+
subject{Query::Result::Sogou.new(File.read($sample_sogou))}
|
6
|
+
|
7
|
+
it "has an array of hashes with the required keys as the result of ads_top" do
|
8
|
+
subject.ads_top.class.should == Array
|
9
|
+
subject.ads_top.each do |ad_top|
|
10
|
+
ad_top.should have_key(:rank)
|
11
|
+
ad_top.should have_key(:host)
|
12
|
+
ad_top.should have_key(:href)
|
13
|
+
ad_top.should have_key(:text)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
it "has an array of hashes with the required keys as the result of ads_right" do
|
18
|
+
subject.ads_right.class.should == Array
|
19
|
+
subject.ads_right.each do |ad_right|
|
20
|
+
ad_right.should have_key(:rank)
|
21
|
+
ad_right.should have_key(:host)
|
22
|
+
ad_right.should have_key(:href)
|
23
|
+
ad_right.should have_key(:text)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
it "has an array of hashes with the required keys as the result of ads_bottom" do
|
28
|
+
subject.ads_bottom.class.should == Array
|
29
|
+
subject.ads_bottom.each do |ad_bottom|
|
30
|
+
ad_bottom.should have_key(:rank)
|
31
|
+
ad_bottom.should have_key(:host)
|
32
|
+
ad_bottom.should have_key(:href)
|
33
|
+
ad_bottom.should have_key(:text)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
describe Query::Result::Sogou do
|
40
|
+
subject{Query::Engine::Sogou.query '中华人民共和国中央人民政府'}
|
41
|
+
|
42
|
+
it "should be an instance of Query::Result::Sogou" do
|
43
|
+
subject.class.should == Query::Result::Sogou
|
44
|
+
end
|
45
|
+
|
46
|
+
it "'s next page is another instance of Query::Result::Sogou" do
|
47
|
+
subject.next.class.should == Query::Result::Sogou
|
48
|
+
end
|
49
|
+
|
50
|
+
it "have over 1000 results" do
|
51
|
+
subject.count.should > 1000
|
52
|
+
end
|
53
|
+
|
54
|
+
it "puts www.gov.cn to the first place of seo_ranks" do
|
55
|
+
subject.rank('www.gov.cn')[0].should == 1
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should have href,text,host elements for each seo result" do
|
59
|
+
subject.seo_ranks.each do |seo_rank|
|
60
|
+
seo_rank[:href].should_not == nil
|
61
|
+
seo_rank[:text].should_not == nil
|
62
|
+
seo_rank[:host].should_not == nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe Query::Engine::Sogou do
|
68
|
+
subject{Query::Engine::Sogou}
|
69
|
+
|
70
|
+
it "have more than 1 million baidu.com pages indexed" do
|
71
|
+
result = subject.pages('baidu.com')
|
72
|
+
result.class.should == Query::Result::Sogou
|
73
|
+
result.count.should > 1000000
|
74
|
+
end
|
75
|
+
|
76
|
+
it "have more than 100 links to baidu.com" do
|
77
|
+
result = subject.links('baidu.com')
|
78
|
+
result.class.should == Query::Result::Sogou
|
79
|
+
result.count.should > 100
|
80
|
+
end
|
81
|
+
|
82
|
+
it "查询已经被收录的页面收录情况时,应返回true" do
|
83
|
+
pending
|
84
|
+
subject.indexed?('http://www.baidu.com').should == true
|
85
|
+
end
|
86
|
+
|
87
|
+
it "查询一个不存在的页面收录情况时,应返回true" do
|
88
|
+
pending
|
89
|
+
subject.indexed?('http://zxv.not-exists.com').should == false
|
90
|
+
end
|
91
|
+
|
92
|
+
describe '#suggestions' do
|
93
|
+
query = '搜狗'
|
94
|
+
subject{Query::Engine::Sogou.suggestions(query)}
|
95
|
+
it 'should have more than one suggestions' do
|
96
|
+
subject.size.should > 1
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'gives all suggestions with the query word at the start' do
|
100
|
+
subject.each do |suggestion|
|
101
|
+
suggestion.should start_with query
|
102
|
+
# (suggestion.start_with?query).should_be true
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
data/spec/spec_helper.rb
CHANGED
@@ -1 +1,12 @@
|
|
1
|
-
require 'query'
|
1
|
+
require 'query'
|
2
|
+
require 'pathname'
|
3
|
+
require 'awesome_print'
|
4
|
+
path_root = Pathname.new(__dir__)
|
5
|
+
path_samples = File.join(path_root,'samples')
|
6
|
+
$sample_qihu = File.join(path_samples,'qihu.html')
|
7
|
+
$sample_sogou = File.join(path_samples,'sogou.html')
|
8
|
+
$sample_msogou = File.join(path_samples,'msogou.html')
|
9
|
+
$sample_baidu1 = File.join(path_samples,'baidu1.html')
|
10
|
+
$sample_baidu2 = File.join(path_samples,'baidu2.html')
|
11
|
+
$sample_mbaidu1 = File.join(path_samples,'mbaidu1.html')
|
12
|
+
$sample_mbaidu2 = File.join(path_samples,'mbaidu2.html')
|
metadata
CHANGED
@@ -1,94 +1,94 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: query
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- seoaqua
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: addressable
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: httparty
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
-
description: This GEM is designed to work for SEOers who need to fetch query
|
84
|
-
results from all kinds of search engines
|
83
|
+
description: This GEM is designed to work for Chinese SEOers who need to fetch query
|
84
|
+
and parse results from all kinds of search engines
|
85
85
|
email:
|
86
86
|
- seoaqua@me.com
|
87
87
|
executables: []
|
88
88
|
extensions: []
|
89
89
|
extra_rdoc_files: []
|
90
90
|
files:
|
91
|
-
- .gitignore
|
91
|
+
- ".gitignore"
|
92
92
|
- Gemfile
|
93
93
|
- LICENSE
|
94
94
|
- LICENSE.txt
|
@@ -98,20 +98,33 @@ files:
|
|
98
98
|
- lib/query/engine.rb
|
99
99
|
- lib/query/engine/baidu.rb
|
100
100
|
- lib/query/engine/baidu_mobile.rb
|
101
|
-
- lib/query/engine/
|
102
|
-
- lib/query/engine/
|
103
|
-
- lib/query/engine/
|
101
|
+
- lib/query/engine/qihu.rb
|
102
|
+
- lib/query/engine/qihu_mobile.rb
|
103
|
+
- lib/query/engine/sogou.rb
|
104
|
+
- lib/query/engine/sogou_mobile.rb
|
104
105
|
- lib/query/result.rb
|
105
106
|
- lib/query/result/baidu.rb
|
106
107
|
- lib/query/result/baidu_mobile.rb
|
107
|
-
- lib/query/result/
|
108
|
-
- lib/query/result/
|
109
|
-
- lib/query/result/
|
108
|
+
- lib/query/result/qihu.rb
|
109
|
+
- lib/query/result/qihu_mobile.rb
|
110
|
+
- lib/query/result/sogou.rb
|
111
|
+
- lib/query/result/sogou_mobile.rb
|
110
112
|
- lib/query/version.rb
|
111
113
|
- query.gemspec
|
112
|
-
- spec/
|
113
|
-
- spec/
|
114
|
-
- spec/
|
114
|
+
- spec/baidu1_spec.rb
|
115
|
+
- spec/baidu2_spec.rb
|
116
|
+
- spec/mbaidu1_spec.rb
|
117
|
+
- spec/msogou_spec.rb
|
118
|
+
- spec/qihu_spec.rb
|
119
|
+
- spec/samples/baidu1.html
|
120
|
+
- spec/samples/baidu2.html
|
121
|
+
- spec/samples/mbaidu1.html
|
122
|
+
- spec/samples/mbaidu2.html
|
123
|
+
- spec/samples/msogou.html
|
124
|
+
- spec/samples/qihu.html
|
125
|
+
- spec/samples/sogou.html
|
126
|
+
- spec/sogou_mobile_spec.rb
|
127
|
+
- spec/sogou_spec.rb
|
115
128
|
- spec/spec_helper.rb
|
116
129
|
homepage: https://github.com/seoaqua/query
|
117
130
|
licenses:
|
@@ -123,22 +136,34 @@ require_paths:
|
|
123
136
|
- lib
|
124
137
|
required_ruby_version: !ruby/object:Gem::Requirement
|
125
138
|
requirements:
|
126
|
-
- -
|
139
|
+
- - ">="
|
127
140
|
- !ruby/object:Gem::Version
|
128
141
|
version: '0'
|
129
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
143
|
requirements:
|
131
|
-
- -
|
144
|
+
- - ">="
|
132
145
|
- !ruby/object:Gem::Version
|
133
146
|
version: '0'
|
134
147
|
requirements: []
|
135
148
|
rubyforge_project:
|
136
|
-
rubygems_version: 2.1
|
149
|
+
rubygems_version: 2.2.1
|
137
150
|
signing_key:
|
138
151
|
specification_version: 4
|
139
|
-
summary:
|
152
|
+
summary: I dont have time to write the document yet. Usage is almost within rspec
|
153
|
+
tests. Any questions,pls contact me with QQ628552
|
140
154
|
test_files:
|
141
|
-
- spec/
|
142
|
-
- spec/
|
143
|
-
- spec/
|
155
|
+
- spec/baidu1_spec.rb
|
156
|
+
- spec/baidu2_spec.rb
|
157
|
+
- spec/mbaidu1_spec.rb
|
158
|
+
- spec/msogou_spec.rb
|
159
|
+
- spec/qihu_spec.rb
|
160
|
+
- spec/samples/baidu1.html
|
161
|
+
- spec/samples/baidu2.html
|
162
|
+
- spec/samples/mbaidu1.html
|
163
|
+
- spec/samples/mbaidu2.html
|
164
|
+
- spec/samples/msogou.html
|
165
|
+
- spec/samples/qihu.html
|
166
|
+
- spec/samples/sogou.html
|
167
|
+
- spec/sogou_mobile_spec.rb
|
168
|
+
- spec/sogou_spec.rb
|
144
169
|
- spec/spec_helper.rb
|
data/lib/query/engine/base.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
module Query
|
2
|
-
module Engine
|
3
|
-
class Base
|
4
|
-
attr_accessor :perpage
|
5
|
-
#是否收录
|
6
|
-
# def initialize(perpage = 100)
|
7
|
-
# @perpage = perpage#只允许10或100
|
8
|
-
# end
|
9
|
-
def indexed?(url)
|
10
|
-
URI(url)
|
11
|
-
result = query(url)
|
12
|
-
return result.has_result?
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
data/lib/query/result/base.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
module Query
|
2
|
-
module Result
|
3
|
-
class Base
|
4
|
-
attr_accessor :baseuri,:pagenumber,:perpage
|
5
|
-
def initialize(page)
|
6
|
-
@page = Nokogiri::HTML page
|
7
|
-
@pagenumber = 1
|
8
|
-
end
|
9
|
-
# def initialize(page,baseuri,pagenumber=1,perpage=100)
|
10
|
-
# @page = Nokogiri::HTML page
|
11
|
-
# @baseuri = baseuri
|
12
|
-
# # @host = URI(baseuri).host
|
13
|
-
# @pagenumber = pagenumber
|
14
|
-
# @perpage = perpage
|
15
|
-
# end
|
16
|
-
def whole
|
17
|
-
{
|
18
|
-
'ads_top'=>ads_top,
|
19
|
-
'ads_right'=>ads_right,
|
20
|
-
'ads_bottom'=>ads_bottom,
|
21
|
-
'ranks'=>ranks
|
22
|
-
}
|
23
|
-
end
|
24
|
-
#返回当前页中host满足条件的结果
|
25
|
-
def ranks_for(specific_host)
|
26
|
-
host_ranks = Hash.new
|
27
|
-
ranks.each do |id,line|
|
28
|
-
if specific_host.class == Regexp
|
29
|
-
host_ranks[id] = line if line['host'] =~ specific_host
|
30
|
-
elsif specific_host.class == String
|
31
|
-
host_ranks[id] = line if line['host'] == specific_host
|
32
|
-
end
|
33
|
-
end
|
34
|
-
host_ranks
|
35
|
-
end
|
36
|
-
#return the top rank number from @ranks with the input host
|
37
|
-
def rank(host)#on base of ranks
|
38
|
-
ranks.each do |id,line|
|
39
|
-
id = id.to_i
|
40
|
-
if host.class == Regexp
|
41
|
-
return id if line['host'] =~ host
|
42
|
-
elsif host.class == String
|
43
|
-
return id if line['host'] == host
|
44
|
-
end
|
45
|
-
end
|
46
|
-
return nil
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
data/lib/query/result/qihoo.rb
DELETED
@@ -1,75 +0,0 @@
|
|
1
|
-
module Query
|
2
|
-
module Result
|
3
|
-
class Qihoo < Base
|
4
|
-
# include Query::Result
|
5
|
-
Host = 'www.so.com'
|
6
|
-
#返回所有当前页的排名结果
|
7
|
-
def ranks
|
8
|
-
return @ranks unless @ranks.nil?
|
9
|
-
@ranks = Hash.new
|
10
|
-
# id = (@pagenumber - 1) * 10
|
11
|
-
id = 0
|
12
|
-
@page.search('//li[@class="res-list"]').each do |li|
|
13
|
-
a = li.search("h3/a").first
|
14
|
-
url = li.search("cite")
|
15
|
-
next if a['data-pos'].nil?
|
16
|
-
id += 1
|
17
|
-
text = a.text.strip
|
18
|
-
href = a['href']
|
19
|
-
url = url.first.text
|
20
|
-
host = Addressable::URI.parse(URI.encode("http://#{url}")).host
|
21
|
-
@ranks[id.to_s] = {'href'=>a['href'],'text'=>text,'host'=>host}
|
22
|
-
end
|
23
|
-
@ranks
|
24
|
-
end
|
25
|
-
def ads_top
|
26
|
-
id = 0
|
27
|
-
result = []
|
28
|
-
@page.search("//ul[@id='djbox']/li").each do |li|
|
29
|
-
id += 1
|
30
|
-
title = li.search("a").first.text
|
31
|
-
href = li.search("cite").first.text.downcase
|
32
|
-
host = Addressable::URI.parse(URI.encode(href)).host
|
33
|
-
result[id] = {'title'=>title,'href'=>href,'host'=>host}
|
34
|
-
end
|
35
|
-
result
|
36
|
-
end
|
37
|
-
def ads_bottom
|
38
|
-
[]
|
39
|
-
end
|
40
|
-
def ads_right
|
41
|
-
id = 0
|
42
|
-
result = []
|
43
|
-
@page.search("//ul[@id='rightbox']/li").each do |li|
|
44
|
-
id += 1
|
45
|
-
title = li.search("a").first.text
|
46
|
-
href = li.search("cite").first.text.downcase
|
47
|
-
host = Addressable::URI.parse(URI.encode(href)).host
|
48
|
-
result[id] = {'title'=>title,'href'=>href,'host'=>host}
|
49
|
-
end
|
50
|
-
result
|
51
|
-
end
|
52
|
-
def related_keywords
|
53
|
-
[]
|
54
|
-
end
|
55
|
-
#下一页
|
56
|
-
def next
|
57
|
-
next_href = @page.xpath('//a[@id="snext"]')
|
58
|
-
return false if next_href.empty?
|
59
|
-
next_href = next_href.first['href']
|
60
|
-
next_href = URI.join(@baseuri,next_href).to_s
|
61
|
-
# next_href = URI.join("http://#{@host}",next_href).to_s
|
62
|
-
next_page = HTTParty.get(next_href).next
|
63
|
-
r =Query::Result::Qihoo.new(next_page)
|
64
|
-
r.baseuri=next_href
|
65
|
-
r.pagenumber=@pagenumber+1
|
66
|
-
r
|
67
|
-
#@page = MbaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil?
|
68
|
-
end
|
69
|
-
#有结果
|
70
|
-
def has_result?
|
71
|
-
!@page.search('//div[@id="main"]/h3').text().include?'没有找到该URL'
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
data/spec/baidu_mobile_spec.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
#coding:UTF-8
|
2
|
-
require 'spec_helper'
|
3
|
-
describe Query::Engine::BaiduMobile do
|
4
|
-
mbaidu = Query::Engine::BaiduMobile.new
|
5
|
-
page = mbaidu.query '百度'
|
6
|
-
it "应返回#{Query::Engine::BaiduMobile}" do
|
7
|
-
page.class.should == Query::Result::BaiduMobile
|
8
|
-
end
|
9
|
-
it "下一页也应是Query::Engine::BaiduMobile" do
|
10
|
-
page.next.class.should == Query::Result::BaiduMobile
|
11
|
-
page.next.next.class.should == Query::Result::BaiduMobile
|
12
|
-
end
|
13
|
-
it "百度百科域名应该大于1" do
|
14
|
-
page.rank('wapbaike.baidu.com').should > 1
|
15
|
-
end
|
16
|
-
it "百度无线域名应该在10以内" do
|
17
|
-
page.rank('m.baidu.com').should < 11
|
18
|
-
end
|
19
|
-
end
|
data/spec/baidu_spec.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
#coding:UTF-8
|
2
|
-
require 'spec_helper'
|
3
|
-
describe Query::Engine::Baidu do
|
4
|
-
baidu = Query::Engine::Baidu.new
|
5
|
-
page = baidu.query '百度'
|
6
|
-
|
7
|
-
it "should return Query::Result::Baidu" do
|
8
|
-
page.class.should == Query::Result::Baidu
|
9
|
-
end
|
10
|
-
|
11
|
-
it "should return 100,000,000" do
|
12
|
-
page.count.should > 100000
|
13
|
-
end
|
14
|
-
it "should return 1" do
|
15
|
-
page.rank('www.baidu.com').should == 1
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should return Query::Result::Baidu" do
|
19
|
-
page.next.class.should == Query::Result::Baidu
|
20
|
-
end
|
21
|
-
|
22
|
-
it "should return true" do
|
23
|
-
bool = Query::Engine::Baidu.popular?'百度'
|
24
|
-
bool.should == true
|
25
|
-
end
|
26
|
-
|
27
|
-
it "should return false" do
|
28
|
-
bool = Query::Engine::Baidu.popular?'lavataliuming'
|
29
|
-
bool.should == false
|
30
|
-
end
|
31
|
-
|
32
|
-
it "should return over 5 words beginning with the query_word" do
|
33
|
-
query_word = '为'
|
34
|
-
suggestions = Query::Engine::Baidu.suggestions(query_word)
|
35
|
-
suggestions.size.should > 5
|
36
|
-
suggestions.each do |suggestion|
|
37
|
-
suggestion[0].should == query_word
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
it "should return 100,000,000" do
|
42
|
-
result = baidu.pages('baidu.com')
|
43
|
-
result.class.should == Query::Result::Baidu
|
44
|
-
result.count.should == 100000000
|
45
|
-
end
|
46
|
-
|
47
|
-
it "should return 100,000,000" do
|
48
|
-
result = baidu.links('baidu.com')
|
49
|
-
result.class.should == Query::Result::Baidu
|
50
|
-
result.count.should == 100000000
|
51
|
-
end
|
52
|
-
it "should return 100,000,000" do
|
53
|
-
result = baidu.pages_with('baidu.com','baidu.com')
|
54
|
-
result.class.should == Query::Result::Baidu
|
55
|
-
result.count.should == 100000000
|
56
|
-
end
|
57
|
-
it "查询已经被收录的页面收录情况时,应返回true" do
|
58
|
-
baidu.indexed?('http://www.baidu.com').should == true
|
59
|
-
end
|
60
|
-
it "查询一个不存在的页面收录情况时,应返回true" do
|
61
|
-
baidu.indexed?('http://zxv.not-exists.com').should == false
|
62
|
-
end
|
63
|
-
page1 = baidu.query('seoaqua.com')
|
64
|
-
it "查询结果应该都能拿到title,href,host" do
|
65
|
-
page1.ranks.each do |id,rank|
|
66
|
-
rank['href'].should_not == nil
|
67
|
-
rank['text'].should_not == nil
|
68
|
-
rank['host'].should_not == nil
|
69
|
-
end
|
70
|
-
end
|
71
|
-
# ads_page = baidu.query '减肥药'
|
72
|
-
|
73
|
-
end
|
data/spec/qihoo_spec.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
#coding:UTF-8
|
2
|
-
require 'spec_helper'
|
3
|
-
describe Query::Engine::Qihoo do
|
4
|
-
qihoo = Query::Engine::Qihoo.new
|
5
|
-
page = qihoo.query '奇虎'
|
6
|
-
page2 = page.next
|
7
|
-
page3 = page2.next
|
8
|
-
it "查询关键词'奇虎'后,应返回正确的实例" do
|
9
|
-
page.class.should == Query::Result::Qihoo
|
10
|
-
end
|
11
|
-
it "查询关键词'奇虎'后,下一页也应是MbaiduResult的实例" do
|
12
|
-
page2.class.should == Query::Result::Qihoo
|
13
|
-
end
|
14
|
-
it "查询关键词'奇虎'后,下一页,再下一页也应是MbaiduResult的实例" do
|
15
|
-
page3.class.should == Query::Result::Qihoo
|
16
|
-
end
|
17
|
-
|
18
|
-
it "查询关键词'奇虎'后,奇虎首页域名应该等于1" do
|
19
|
-
page.rank('www.qihoo.com').should == 1
|
20
|
-
end
|
21
|
-
it "查询已经被收录的页面收录情况时,应返回true" do
|
22
|
-
qihoo.indexed?('http://www.360.cn').should == true
|
23
|
-
end
|
24
|
-
it "查询一个不存在的页面收录情况时,应返回true" do
|
25
|
-
qihoo.indexed?('http://zxv.not-exists.com').should == false
|
26
|
-
end
|
27
|
-
end
|