query 0.1.4 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/README.md +4 -1
- data/lib/query/result/baidu.rb +76 -76
- data/lib/query/result/qihu.rb +2 -1
- data/lib/query/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd353429ae373984a436cf6ac00061be274feca7
|
4
|
+
data.tar.gz: 9d7505c017f842ea9aa22398dfd871b0431b0d72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66b91a869fedb625008905b11e6c960bab5057e6689b4268219ce66a3446fd4b09873b0d91d3b48c54514918cec7fd2ea7fe3a8e37fefd35e9e0f2197787675e
|
7
|
+
data.tar.gz: 3d324814ca1997e351ca2d36ade7f7a4d287e8c78b8f13020b09e6b0080793b4fc91ab98c1a394154b3633ec551eae05f99adc957dbfdcca587537ee8c32fb9b
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -22,7 +22,10 @@ end
|
|
22
22
|
|
23
23
|
# to get the top rank of host "www.abc.com.cn" by querying "abc"
|
24
24
|
|
25
|
-
|
25
|
+
```ruby
|
26
|
+
puts Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
|
27
|
+
#[3,1,2,4] => [rank_seo, rank_top_ads, rank_right_ads, rank_bottom_ads]
|
28
|
+
```
|
26
29
|
|
27
30
|
TODO:
|
28
31
|
查询结果不多,翻页不存在时的处理,及rspec
|
data/lib/query/result/baidu.rb
CHANGED
@@ -1,87 +1,87 @@
|
|
1
1
|
module Query
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
module Result
|
3
|
+
class Baidu
|
4
|
+
include Query::Result
|
5
|
+
def seo_ranks
|
6
|
+
return @ranks unless @ranks.nil?
|
7
|
+
@page.search("//*[@class='result']|//*[@class='result-op']|//*[@class='result-op c-container']").map.with_index do |table,index|
|
8
|
+
parse_seo(table).merge({:rank => index + 1})
|
9
|
+
end
|
10
|
+
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
def ads_top
|
13
|
+
@page.search("//*[@class='result']/preceding-sibling::*[contains(@class,'EC_result')]").map.with_index do |div, index|
|
14
|
+
parse_ad(div).merge(:rank => index + 1)
|
15
|
+
end
|
16
|
+
end
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
def ads_bottom
|
19
|
+
@page.search("//*[@class='result']/following-sibling::*[contains(@class,'EC_result')]").map.with_index do |div,index|
|
20
|
+
parse_ad(div).merge(:rank => index + 1)
|
21
|
+
end
|
22
|
+
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
24
|
+
def ads_right
|
25
|
+
@page.search("//div[@id='ec_im_container']/div[@id]").map.with_index do |div,index|
|
26
|
+
a = div.search('a').first
|
27
|
+
url = div.search("*[@class='EC_url']").first.text
|
28
|
+
url = "http://#{url}"
|
29
|
+
{
|
30
|
+
:rank => index + 1,
|
31
|
+
:text => a.text.strip,
|
32
|
+
:href => a['href'].strip,
|
33
|
+
:host => Addressable::URI.parse(URI.encode(url)).host
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
def count
|
39
|
+
@count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
|
40
|
+
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
def related_keywords
|
43
|
+
@related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
|
44
|
+
end
|
45
45
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
def has_result?
|
47
|
+
submit = @page.search('//a[text()="提交网址"]').first
|
48
|
+
return false if submit and submit['href'].include?'sitesubmit'
|
49
|
+
return true
|
50
|
+
end
|
51
51
|
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
def next_url
|
53
|
+
@page.search("//a[text()='下一页>']").first['href']
|
54
|
+
end
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
56
|
+
private
|
57
|
+
def parse_ad(div)
|
58
|
+
#@todo should be :
|
59
|
+
#title = div.xpath("*[contains(@class,'ec_title')]",MyFilter.new).first
|
60
|
+
title = div.xpath("//*[contains(@class,'ec_title')]",MyFilter.new).first
|
61
|
+
url = %w( span[@class='ec_url'] a[@class='EC_url'] ).map do |xpath|
|
62
|
+
node = div.search(xpath).first
|
63
|
+
node.text if node
|
64
|
+
end.compact.first
|
65
|
+
url = "http://" + url
|
66
|
+
{
|
67
|
+
:text => title.text,
|
68
|
+
:href => title['href'],
|
69
|
+
:host => Addressable::URI.parse(URI.encode(url)).host
|
70
|
+
}
|
71
|
+
end
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
73
|
+
def parse_seo(table)
|
74
|
+
url = %w( span[@class="g"] span[@class="c-showurl"] div[@class="op_zhidao_showurl"]).map do |xpath|
|
75
|
+
span = table.search(xpath).first
|
76
|
+
span.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip if span
|
77
|
+
end.compact.first
|
78
|
+
host = Addressable::URI.parse(URI.encode("http://#{url}")).host
|
79
|
+
{
|
80
|
+
:text => table.search("h3").first.text.strip,
|
81
|
+
:href => table.search('a').first['href'].strip,
|
82
|
+
:host => host
|
83
|
+
}
|
84
|
+
end
|
86
85
|
end
|
87
|
-
end
|
86
|
+
end
|
87
|
+
end
|
data/lib/query/result/qihu.rb
CHANGED
@@ -34,6 +34,7 @@ module Query
|
|
34
34
|
def ads_right
|
35
35
|
@page.search("//ul[@id='rightbox']/li").map.with_index do |li,index|
|
36
36
|
a = li.search('a').first
|
37
|
+
next unless a['_cs']
|
37
38
|
href = CGI.parse(URI(a['_cs']).query)['aurl'].first
|
38
39
|
host = Addressable::URI.parse(URI.encode(href)).host
|
39
40
|
{
|
@@ -42,7 +43,7 @@ module Query
|
|
42
43
|
:href => href,
|
43
44
|
:host => host
|
44
45
|
}
|
45
|
-
end
|
46
|
+
end.compact
|
46
47
|
end
|
47
48
|
|
48
49
|
def related_keywords
|
data/lib/query/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: query
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- seoaqua
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -146,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
146
146
|
version: '0'
|
147
147
|
requirements: []
|
148
148
|
rubyforge_project:
|
149
|
-
rubygems_version: 2.2.
|
149
|
+
rubygems_version: 2.2.2
|
150
150
|
signing_key:
|
151
151
|
specification_version: 4
|
152
152
|
summary: I dont have time to write the document yet. Usage is almost within rspec
|