query 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/README.md +4 -1
- data/lib/query/result/baidu.rb +76 -76
- data/lib/query/result/qihu.rb +2 -1
- data/lib/query/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd353429ae373984a436cf6ac00061be274feca7
|
4
|
+
data.tar.gz: 9d7505c017f842ea9aa22398dfd871b0431b0d72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66b91a869fedb625008905b11e6c960bab5057e6689b4268219ce66a3446fd4b09873b0d91d3b48c54514918cec7fd2ea7fe3a8e37fefd35e9e0f2197787675e
|
7
|
+
data.tar.gz: 3d324814ca1997e351ca2d36ade7f7a4d287e8c78b8f13020b09e6b0080793b4fc91ab98c1a394154b3633ec551eae05f99adc957dbfdcca587537ee8c32fb9b
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -22,7 +22,10 @@ end
|
|
22
22
|
|
23
23
|
# to get the top rank of host "www.abc.com.cn" by querying "abc"
|
24
24
|
|
25
|
-
|
25
|
+
```ruby
|
26
|
+
puts Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
|
27
|
+
#[3,1,2,4] => [rank_seo, rank_top_ads, rank_right_ads, rank_bottom_ads]
|
28
|
+
```
|
26
29
|
|
27
30
|
TODO:
|
28
31
|
查询结果不多,翻页不存在时的处理,及rspec
|
data/lib/query/result/baidu.rb
CHANGED
@@ -1,87 +1,87 @@
|
|
1
1
|
module Query
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
module Result
|
3
|
+
class Baidu
|
4
|
+
include Query::Result
|
5
|
+
def seo_ranks
|
6
|
+
return @ranks unless @ranks.nil?
|
7
|
+
@page.search("//*[@class='result']|//*[@class='result-op']|//*[@class='result-op c-container']").map.with_index do |table,index|
|
8
|
+
parse_seo(table).merge({:rank => index + 1})
|
9
|
+
end
|
10
|
+
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
def ads_top
|
13
|
+
@page.search("//*[@class='result']/preceding-sibling::*[contains(@class,'EC_result')]").map.with_index do |div, index|
|
14
|
+
parse_ad(div).merge(:rank => index + 1)
|
15
|
+
end
|
16
|
+
end
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
def ads_bottom
|
19
|
+
@page.search("//*[@class='result']/following-sibling::*[contains(@class,'EC_result')]").map.with_index do |div,index|
|
20
|
+
parse_ad(div).merge(:rank => index + 1)
|
21
|
+
end
|
22
|
+
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
24
|
+
def ads_right
|
25
|
+
@page.search("//div[@id='ec_im_container']/div[@id]").map.with_index do |div,index|
|
26
|
+
a = div.search('a').first
|
27
|
+
url = div.search("*[@class='EC_url']").first.text
|
28
|
+
url = "http://#{url}"
|
29
|
+
{
|
30
|
+
:rank => index + 1,
|
31
|
+
:text => a.text.strip,
|
32
|
+
:href => a['href'].strip,
|
33
|
+
:host => Addressable::URI.parse(URI.encode(url)).host
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
def count
|
39
|
+
@count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
|
40
|
+
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
def related_keywords
|
43
|
+
@related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
|
44
|
+
end
|
45
45
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
def has_result?
|
47
|
+
submit = @page.search('//a[text()="提交网址"]').first
|
48
|
+
return false if submit and submit['href'].include?'sitesubmit'
|
49
|
+
return true
|
50
|
+
end
|
51
51
|
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
def next_url
|
53
|
+
@page.search("//a[text()='下一页>']").first['href']
|
54
|
+
end
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
56
|
+
private
|
57
|
+
def parse_ad(div)
|
58
|
+
#@todo should be :
|
59
|
+
#title = div.xpath("*[contains(@class,'ec_title')]",MyFilter.new).first
|
60
|
+
title = div.xpath("//*[contains(@class,'ec_title')]",MyFilter.new).first
|
61
|
+
url = %w( span[@class='ec_url'] a[@class='EC_url'] ).map do |xpath|
|
62
|
+
node = div.search(xpath).first
|
63
|
+
node.text if node
|
64
|
+
end.compact.first
|
65
|
+
url = "http://" + url
|
66
|
+
{
|
67
|
+
:text => title.text,
|
68
|
+
:href => title['href'],
|
69
|
+
:host => Addressable::URI.parse(URI.encode(url)).host
|
70
|
+
}
|
71
|
+
end
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
73
|
+
def parse_seo(table)
|
74
|
+
url = %w( span[@class="g"] span[@class="c-showurl"] div[@class="op_zhidao_showurl"]).map do |xpath|
|
75
|
+
span = table.search(xpath).first
|
76
|
+
span.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip if span
|
77
|
+
end.compact.first
|
78
|
+
host = Addressable::URI.parse(URI.encode("http://#{url}")).host
|
79
|
+
{
|
80
|
+
:text => table.search("h3").first.text.strip,
|
81
|
+
:href => table.search('a').first['href'].strip,
|
82
|
+
:host => host
|
83
|
+
}
|
84
|
+
end
|
86
85
|
end
|
87
|
-
end
|
86
|
+
end
|
87
|
+
end
|
data/lib/query/result/qihu.rb
CHANGED
@@ -34,6 +34,7 @@ module Query
|
|
34
34
|
def ads_right
|
35
35
|
@page.search("//ul[@id='rightbox']/li").map.with_index do |li,index|
|
36
36
|
a = li.search('a').first
|
37
|
+
next unless a['_cs']
|
37
38
|
href = CGI.parse(URI(a['_cs']).query)['aurl'].first
|
38
39
|
host = Addressable::URI.parse(URI.encode(href)).host
|
39
40
|
{
|
@@ -42,7 +43,7 @@ module Query
|
|
42
43
|
:href => href,
|
43
44
|
:host => host
|
44
45
|
}
|
45
|
-
end
|
46
|
+
end.compact
|
46
47
|
end
|
47
48
|
|
48
49
|
def related_keywords
|
data/lib/query/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: query
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- seoaqua
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -146,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
146
146
|
version: '0'
|
147
147
|
requirements: []
|
148
148
|
rubyforge_project:
|
149
|
-
rubygems_version: 2.2.
|
149
|
+
rubygems_version: 2.2.2
|
150
150
|
signing_key:
|
151
151
|
specification_version: 4
|
152
152
|
summary: I dont have time to write the document yet. Usage is almost within rspec
|