query 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5043b0180a473ab3d213136dfe9ab55ccb4a6d9
4
- data.tar.gz: dc0b8b1ee15dc3f4437439712904de92b838520e
3
+ metadata.gz: fd353429ae373984a436cf6ac00061be274feca7
4
+ data.tar.gz: 9d7505c017f842ea9aa22398dfd871b0431b0d72
5
5
  SHA512:
6
- metadata.gz: baa8ca09bc7bfd19f1eb3e9dffa24cf0cc28aba704d8671769bc26a79c68f10c01d57df4f76d4e6558ea638fd6c211111ca21d1883675450971256fc8369bc2a
7
- data.tar.gz: 804e6685b6d7d49e563318a9150eae1a655c52cb6f6ca9084c7fbb0c908fde92a44025761668abe2b7aa4b21ba31fbb0c9ebcbc24a6d3c61fd2415e5344b9fd4
6
+ metadata.gz: 66b91a869fedb625008905b11e6c960bab5057e6689b4268219ce66a3446fd4b09873b0d91d3b48c54514918cec7fd2ea7fe3a8e37fefd35e9e0f2197787675e
7
+ data.tar.gz: 3d324814ca1997e351ca2d36ade7f7a4d287e8c78b8f13020b09e6b0080793b4fc91ab98c1a394154b3633ec551eae05f99adc957dbfdcca587537ee8c32fb9b
data/.gitignore CHANGED
@@ -24,3 +24,9 @@ doc/
24
24
  lib/query/.DS_Store
25
25
 
26
26
  lib/query/.DS_Store
27
+
28
+ .gemtags
29
+
30
+ .tags
31
+
32
+ .tags_sorted_by_file
data/README.md CHANGED
@@ -22,7 +22,10 @@ end
22
22
 
23
23
  # to get the top rank of host "www.abc.com.cn" by querying "abc"
24
24
 
25
- Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
25
+ ```ruby
26
+ puts Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
27
+ #[3,1,2,4] => [rank_seo, rank_top_ads, rank_right_ads, rank_bottom_ads]
28
+ ```
26
29
 
27
30
  TODO:
28
31
  查询结果不多,翻页不存在时的处理,及rspec
@@ -1,87 +1,87 @@
1
1
  module Query
2
- module Result
3
- class Baidu
4
- include Query::Result
5
- def seo_ranks
6
- return @ranks unless @ranks.nil?
7
- @page.search("//*[@class='result']|//*[@class='result-op']|//*[@class='result-op c-container']").map.with_index do |table,index|
8
- parse_seo(table).merge({:rank => index + 1})
9
- end
10
- end
2
+ module Result
3
+ class Baidu
4
+ include Query::Result
5
+ def seo_ranks
6
+ return @ranks unless @ranks.nil?
7
+ @page.search("//*[@class='result']|//*[@class='result-op']|//*[@class='result-op c-container']").map.with_index do |table,index|
8
+ parse_seo(table).merge({:rank => index + 1})
9
+ end
10
+ end
11
11
 
12
- def ads_top
13
- @page.search("//*[@class='result']/preceding-sibling::*[contains(@class,'EC_result')]").map.with_index do |div, index|
14
- parse_ad(div).merge(:rank => index + 1)
15
- end
16
- end
12
+ def ads_top
13
+ @page.search("//*[@class='result']/preceding-sibling::*[contains(@class,'EC_result')]").map.with_index do |div, index|
14
+ parse_ad(div).merge(:rank => index + 1)
15
+ end
16
+ end
17
17
 
18
- def ads_bottom
19
- @page.search("//*[@class='result']/following-sibling::*[contains(@class,'EC_result')]").map.with_index do |div,index|
20
- parse_ad(div).merge(:rank => index + 1)
21
- end
22
- end
18
+ def ads_bottom
19
+ @page.search("//*[@class='result']/following-sibling::*[contains(@class,'EC_result')]").map.with_index do |div,index|
20
+ parse_ad(div).merge(:rank => index + 1)
21
+ end
22
+ end
23
23
 
24
- def ads_right
25
- @page.search("//div[@id='ec_im_container']/div[@id]").map.with_index do |div,index|
26
- a = div.search('a').first
27
- url = div.search("*[@class='EC_url']").first.text
28
- url = "http://#{url}"
29
- {
30
- :rank => index + 1,
31
- :text => a.text.strip,
32
- :href => a['href'].strip,
33
- :host => Addressable::URI.parse(URI.encode(url)).host
34
- }
35
- end
36
- end
24
+ def ads_right
25
+ @page.search("//div[@id='ec_im_container']/div[@id]").map.with_index do |div,index|
26
+ a = div.search('a').first
27
+ url = div.search("*[@class='EC_url']").first.text
28
+ url = "http://#{url}"
29
+ {
30
+ :rank => index + 1,
31
+ :text => a.text.strip,
32
+ :href => a['href'].strip,
33
+ :host => Addressable::URI.parse(URI.encode(url)).host
34
+ }
35
+ end
36
+ end
37
37
 
38
- def count
39
- @count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
40
- end
38
+ def count
39
+ @count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
40
+ end
41
41
 
42
- def related_keywords
43
- @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
44
- end
42
+ def related_keywords
43
+ @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
44
+ end
45
45
 
46
- def has_result?
47
- submit = @page.search('//a[text()="提交网址"]').first
48
- return false if submit and submit['href'].include?'sitesubmit'
49
- return true
50
- end
46
+ def has_result?
47
+ submit = @page.search('//a[text()="提交网址"]').first
48
+ return false if submit and submit['href'].include?'sitesubmit'
49
+ return true
50
+ end
51
51
 
52
- def next_url
53
- @page.search("//a[text()='下一页>']").first['href']
54
- end
52
+ def next_url
53
+ @page.search("//a[text()='下一页>']").first['href']
54
+ end
55
55
 
56
- private
57
- def parse_ad(div)
58
- #@todo should be :
59
- #title = div.xpath("*[contains(@class,'ec_title')]",MyFilter.new).first
60
- title = div.xpath("//*[contains(@class,'ec_title')]",MyFilter.new).first
61
- url = %w( span[@class='ec_url'] a[@class='EC_url'] ).map do |xpath|
62
- node = div.search(xpath).first
63
- node.text if node
64
- end.compact.first
65
- url = "http://" + url
66
- {
67
- :text => title.text,
68
- :href => title['href'],
69
- :host => Addressable::URI.parse(URI.encode(url)).host
70
- }
71
- end
56
+ private
57
+ def parse_ad(div)
58
+ #@todo should be :
59
+ #title = div.xpath("*[contains(@class,'ec_title')]",MyFilter.new).first
60
+ title = div.xpath("//*[contains(@class,'ec_title')]",MyFilter.new).first
61
+ url = %w( span[@class='ec_url'] a[@class='EC_url'] ).map do |xpath|
62
+ node = div.search(xpath).first
63
+ node.text if node
64
+ end.compact.first
65
+ url = "http://" + url
66
+ {
67
+ :text => title.text,
68
+ :href => title['href'],
69
+ :host => Addressable::URI.parse(URI.encode(url)).host
70
+ }
71
+ end
72
72
 
73
- def parse_seo(table)
74
- url = %w( span[@class="g"] span[@class="c-showurl"] div[@class="op_zhidao_showurl"]).map do |xpath|
75
- span = table.search(xpath).first
76
- span.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip if span
77
- end.compact.first
78
- host = Addressable::URI.parse(URI.encode("http://#{url}")).host
79
- {
80
- :text => table.search("h3").first.text.strip,
81
- :href => table.search('a').first['href'].strip,
82
- :host => host
83
- }
84
- end
85
- end
73
+ def parse_seo(table)
74
+ url = %w( span[@class="g"] span[@class="c-showurl"] div[@class="op_zhidao_showurl"]).map do |xpath|
75
+ span = table.search(xpath).first
76
+ span.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip if span
77
+ end.compact.first
78
+ host = Addressable::URI.parse(URI.encode("http://#{url}")).host
79
+ {
80
+ :text => table.search("h3").first.text.strip,
81
+ :href => table.search('a').first['href'].strip,
82
+ :host => host
83
+ }
84
+ end
86
85
  end
87
- end
86
+ end
87
+ end
@@ -34,6 +34,7 @@ module Query
34
34
  def ads_right
35
35
  @page.search("//ul[@id='rightbox']/li").map.with_index do |li,index|
36
36
  a = li.search('a').first
37
+ next unless a['_cs']
37
38
  href = CGI.parse(URI(a['_cs']).query)['aurl'].first
38
39
  host = Addressable::URI.parse(URI.encode(href)).host
39
40
  {
@@ -42,7 +43,7 @@ module Query
42
43
  :href => href,
43
44
  :host => host
44
45
  }
45
- end
46
+ end.compact
46
47
  end
47
48
 
48
49
  def related_keywords
data/lib/query/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Query
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: query
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - seoaqua
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -146,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
146
146
  version: '0'
147
147
  requirements: []
148
148
  rubyforge_project:
149
- rubygems_version: 2.2.1
149
+ rubygems_version: 2.2.2
150
150
  signing_key:
151
151
  specification_version: 4
152
152
  summary: I dont have time to write the document yet. Usage is almost within rspec