query 0.1.4 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5043b0180a473ab3d213136dfe9ab55ccb4a6d9
4
- data.tar.gz: dc0b8b1ee15dc3f4437439712904de92b838520e
3
+ metadata.gz: fd353429ae373984a436cf6ac00061be274feca7
4
+ data.tar.gz: 9d7505c017f842ea9aa22398dfd871b0431b0d72
5
5
  SHA512:
6
- metadata.gz: baa8ca09bc7bfd19f1eb3e9dffa24cf0cc28aba704d8671769bc26a79c68f10c01d57df4f76d4e6558ea638fd6c211111ca21d1883675450971256fc8369bc2a
7
- data.tar.gz: 804e6685b6d7d49e563318a9150eae1a655c52cb6f6ca9084c7fbb0c908fde92a44025761668abe2b7aa4b21ba31fbb0c9ebcbc24a6d3c61fd2415e5344b9fd4
6
+ metadata.gz: 66b91a869fedb625008905b11e6c960bab5057e6689b4268219ce66a3446fd4b09873b0d91d3b48c54514918cec7fd2ea7fe3a8e37fefd35e9e0f2197787675e
7
+ data.tar.gz: 3d324814ca1997e351ca2d36ade7f7a4d287e8c78b8f13020b09e6b0080793b4fc91ab98c1a394154b3633ec551eae05f99adc957dbfdcca587537ee8c32fb9b
data/.gitignore CHANGED
@@ -24,3 +24,9 @@ doc/
24
24
  lib/query/.DS_Store
25
25
 
26
26
  lib/query/.DS_Store
27
+
28
+ .gemtags
29
+
30
+ .tags
31
+
32
+ .tags_sorted_by_file
data/README.md CHANGED
@@ -22,7 +22,10 @@ end
22
22
 
23
23
  # to get the top rank of host "www.abc.com.cn" by querying "abc"
24
24
 
25
- Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
25
+ ```ruby
26
+ puts Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
27
+ #[3,1,2,4] => [rank_seo, rank_top_ads, rank_right_ads, rank_bottom_ads]
28
+ ```
26
29
 
27
30
  TODO:
28
31
  查询结果不多,翻页不存在时的处理,及rspec
@@ -1,87 +1,87 @@
1
1
  module Query
2
- module Result
3
- class Baidu
4
- include Query::Result
5
- def seo_ranks
6
- return @ranks unless @ranks.nil?
7
- @page.search("//*[@class='result']|//*[@class='result-op']|//*[@class='result-op c-container']").map.with_index do |table,index|
8
- parse_seo(table).merge({:rank => index + 1})
9
- end
10
- end
2
+ module Result
3
+ class Baidu
4
+ include Query::Result
5
+ def seo_ranks
6
+ return @ranks unless @ranks.nil?
7
+ @page.search("//*[@class='result']|//*[@class='result-op']|//*[@class='result-op c-container']").map.with_index do |table,index|
8
+ parse_seo(table).merge({:rank => index + 1})
9
+ end
10
+ end
11
11
 
12
- def ads_top
13
- @page.search("//*[@class='result']/preceding-sibling::*[contains(@class,'EC_result')]").map.with_index do |div, index|
14
- parse_ad(div).merge(:rank => index + 1)
15
- end
16
- end
12
+ def ads_top
13
+ @page.search("//*[@class='result']/preceding-sibling::*[contains(@class,'EC_result')]").map.with_index do |div, index|
14
+ parse_ad(div).merge(:rank => index + 1)
15
+ end
16
+ end
17
17
 
18
- def ads_bottom
19
- @page.search("//*[@class='result']/following-sibling::*[contains(@class,'EC_result')]").map.with_index do |div,index|
20
- parse_ad(div).merge(:rank => index + 1)
21
- end
22
- end
18
+ def ads_bottom
19
+ @page.search("//*[@class='result']/following-sibling::*[contains(@class,'EC_result')]").map.with_index do |div,index|
20
+ parse_ad(div).merge(:rank => index + 1)
21
+ end
22
+ end
23
23
 
24
- def ads_right
25
- @page.search("//div[@id='ec_im_container']/div[@id]").map.with_index do |div,index|
26
- a = div.search('a').first
27
- url = div.search("*[@class='EC_url']").first.text
28
- url = "http://#{url}"
29
- {
30
- :rank => index + 1,
31
- :text => a.text.strip,
32
- :href => a['href'].strip,
33
- :host => Addressable::URI.parse(URI.encode(url)).host
34
- }
35
- end
36
- end
24
+ def ads_right
25
+ @page.search("//div[@id='ec_im_container']/div[@id]").map.with_index do |div,index|
26
+ a = div.search('a').first
27
+ url = div.search("*[@class='EC_url']").first.text
28
+ url = "http://#{url}"
29
+ {
30
+ :rank => index + 1,
31
+ :text => a.text.strip,
32
+ :href => a['href'].strip,
33
+ :host => Addressable::URI.parse(URI.encode(url)).host
34
+ }
35
+ end
36
+ end
37
37
 
38
- def count
39
- @count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
40
- end
38
+ def count
39
+ @count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
40
+ end
41
41
 
42
- def related_keywords
43
- @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
44
- end
42
+ def related_keywords
43
+ @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
44
+ end
45
45
 
46
- def has_result?
47
- submit = @page.search('//a[text()="提交网址"]').first
48
- return false if submit and submit['href'].include?'sitesubmit'
49
- return true
50
- end
46
+ def has_result?
47
+ submit = @page.search('//a[text()="提交网址"]').first
48
+ return false if submit and submit['href'].include?'sitesubmit'
49
+ return true
50
+ end
51
51
 
52
- def next_url
53
- @page.search("//a[text()='下一页>']").first['href']
54
- end
52
+ def next_url
53
+ @page.search("//a[text()='下一页>']").first['href']
54
+ end
55
55
 
56
- private
57
- def parse_ad(div)
58
- #@todo should be :
59
- #title = div.xpath("*[contains(@class,'ec_title')]",MyFilter.new).first
60
- title = div.xpath("//*[contains(@class,'ec_title')]",MyFilter.new).first
61
- url = %w( span[@class='ec_url'] a[@class='EC_url'] ).map do |xpath|
62
- node = div.search(xpath).first
63
- node.text if node
64
- end.compact.first
65
- url = "http://" + url
66
- {
67
- :text => title.text,
68
- :href => title['href'],
69
- :host => Addressable::URI.parse(URI.encode(url)).host
70
- }
71
- end
56
+ private
57
+ def parse_ad(div)
58
+ #@todo should be :
59
+ #title = div.xpath("*[contains(@class,'ec_title')]",MyFilter.new).first
60
+ title = div.xpath("//*[contains(@class,'ec_title')]",MyFilter.new).first
61
+ url = %w( span[@class='ec_url'] a[@class='EC_url'] ).map do |xpath|
62
+ node = div.search(xpath).first
63
+ node.text if node
64
+ end.compact.first
65
+ url = "http://" + url
66
+ {
67
+ :text => title.text,
68
+ :href => title['href'],
69
+ :host => Addressable::URI.parse(URI.encode(url)).host
70
+ }
71
+ end
72
72
 
73
- def parse_seo(table)
74
- url = %w( span[@class="g"] span[@class="c-showurl"] div[@class="op_zhidao_showurl"]).map do |xpath|
75
- span = table.search(xpath).first
76
- span.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip if span
77
- end.compact.first
78
- host = Addressable::URI.parse(URI.encode("http://#{url}")).host
79
- {
80
- :text => table.search("h3").first.text.strip,
81
- :href => table.search('a').first['href'].strip,
82
- :host => host
83
- }
84
- end
85
- end
73
+ def parse_seo(table)
74
+ url = %w( span[@class="g"] span[@class="c-showurl"] div[@class="op_zhidao_showurl"]).map do |xpath|
75
+ span = table.search(xpath).first
76
+ span.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip if span
77
+ end.compact.first
78
+ host = Addressable::URI.parse(URI.encode("http://#{url}")).host
79
+ {
80
+ :text => table.search("h3").first.text.strip,
81
+ :href => table.search('a').first['href'].strip,
82
+ :host => host
83
+ }
84
+ end
86
85
  end
87
- end
86
+ end
87
+ end
@@ -34,6 +34,7 @@ module Query
34
34
  def ads_right
35
35
  @page.search("//ul[@id='rightbox']/li").map.with_index do |li,index|
36
36
  a = li.search('a').first
37
+ next unless a['_cs']
37
38
  href = CGI.parse(URI(a['_cs']).query)['aurl'].first
38
39
  host = Addressable::URI.parse(URI.encode(href)).host
39
40
  {
@@ -42,7 +43,7 @@ module Query
42
43
  :href => href,
43
44
  :host => host
44
45
  }
45
- end
46
+ end.compact
46
47
  end
47
48
 
48
49
  def related_keywords
data/lib/query/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Query
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: query
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - seoaqua
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -146,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
146
146
  version: '0'
147
147
  requirements: []
148
148
  rubyforge_project:
149
- rubygems_version: 2.2.1
149
+ rubygems_version: 2.2.2
150
150
  signing_key:
151
151
  specification_version: 4
152
152
  summary: I dont have time to write the document yet. Usage is almost within rspec