query 0.1.25 → 0.1.28

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e5167ec8d897abe4697f19e8bfc2d43bc569c4e
4
- data.tar.gz: 8d043368a21d1d9ad618cf5b41966ab3f256c641
3
+ metadata.gz: e8d3d289a2e63a1c88194c7b13a954927c5c5d99
4
+ data.tar.gz: 6efec7b1990e7216d9bcfd2a049f3f58b4e3e5a6
5
5
  SHA512:
6
- metadata.gz: 78e8dae9bb5ce55a90d3b2b1ec0825bf91153245d64d6f95d1e97bdae55b52ec52fe57ce989aa3d58897f09c7399ce3e2c9a24eb3e4c34dad64f837cd7215256
7
- data.tar.gz: 5b41c3903724261500b4bf2a8d036b9f1d1ed067e4bba8625cd3262acd4d3a7fa96fe7ff7a75156b33336e49335f711a7290f3c0dfe5a787796b87544b1443f5
6
+ metadata.gz: c1083ec4211a68b2311831c01b9e22e4ada3de8f4119a9d2da43d3c44407f69b2902ab7321b69902d32f8f98aa0921a240588533156ef1cc2976937a13f90bc3
7
+ data.tar.gz: 9cbe62910dc172dd29af51e6ddc74ec36d36ee1b20201a9cbc00961467a745c51f49400ced9a62818ad0304bc9e1475764d80312c4a848184e47ad423f1fd8f7
data/Gemfile CHANGED
@@ -1,4 +1,3 @@
1
-
2
- source 'http://ruby.taobao.org'
1
+ source 'https://ruby.taobao.org'
3
2
  # Specify your gem's dependencies in query.gemspec
4
3
  gemspec
data/LICENSE CHANGED
@@ -1,20 +1,21 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2013 seoaqua
3
+ Copyright (c) 2015 Warriors Of the Night
4
4
 
5
- Permission is hereby granted, free of charge, to any person obtaining a copy of
6
- this software and associated documentation files (the "Software"), to deal in
7
- the Software without restriction, including without limitation the rights to
8
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
- the Software, and to permit persons to whom the Software is furnished to do so,
10
- subject to the following conditions:
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
11
 
12
12
  The above copyright notice and this permission notice shall be included in all
13
13
  copies or substantial portions of the Software.
14
14
 
15
15
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
- FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md CHANGED
@@ -13,31 +13,34 @@ opt = {
13
13
  puts ::Query::Engine::Baidu.suggestions('abc',opt)
14
14
  ```
15
15
 
16
- #to get the result list by querying "abc"
16
+ #### to get the result list by querying "abc"
17
17
 
18
+ ```ruby
18
19
  Query::Engine::Baidu.new.query("abc").ranks().each do |id,value|
19
20
  puts id,value
20
21
  end
22
+ ````
23
+ #### to get the result list with host "www.abc.com.cn" by querying "abc"
21
24
 
22
- #to get the result list with host "www.abc.com.cn" by querying "abc"
23
-
25
+ ```ruby
24
26
  Query::Engine::Baidu.new.query("abc").ranks("www.abc.com.cn").each do |id,value|
25
27
  puts id,value
26
28
  end
29
+ ```
30
+ #### to get the result list with host which fit the regex /com.cn/ by querying "abc"
27
31
 
28
- #to get the result list with host which fit the regex /com.cn/ by querying "abc"
29
-
32
+ ```ruby
30
33
  Query::Engine::Baidu.new.query("abc").ranks(/com.cn/).each do |id,value|
31
34
  puts id,value
32
35
  end
33
-
34
- # to get the top rank of host "www.abc.com.cn" by querying "abc"
36
+ ```
37
+ #### to get the top rank of host "www.abc.com.cn" by querying "abc"
35
38
 
36
39
  ```ruby
37
40
  puts Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
38
- #[3,1,2,4] => [rank_seo, rank_top_ads, rank_right_ads, rank_bottom_ads]
41
+ [3,1,2,4] => [rank_seo, rank_top_ads, rank_right_ads, rank_bottom_ads]
39
42
  ```
40
43
 
41
- TODO:
42
- 查询结果不多,翻页不存在时的处理,及rspec
43
- 增加其他搜索引擎
44
+ #### TODO:
45
+ * 查询结果不多,翻页不存在时的处理,及rspec
46
+ * 增加其他搜索引擎
data/lib/query.rb CHANGED
@@ -4,11 +4,14 @@ class MyFilter
4
4
  # set.any? { |x| x.to_s.downcase == str.downcase}
5
5
  end
6
6
  end
7
- require 'query/result'
8
- require 'query/engine'
7
+
8
+ require 'require_all'
9
+ require 'uri'
10
+ require 'httparty'
11
+ require_all "#{__dir__}/query"
9
12
 
10
13
  module Query
11
14
  def self.get_redirect_url(url)
12
- Net::HTTP.get_response(URI(url)).response['location']
15
+ Net::HTTP.get_response(URI(url)).response['location'] || url
13
16
  end
14
17
  end
data/lib/query/engine.rb CHANGED
@@ -8,10 +8,3 @@ module Query
8
8
  end
9
9
  end
10
10
  end
11
- require 'httparty'
12
- require 'query/engine/baidu'
13
- require 'query/engine/baidu_mobile'
14
- require 'query/engine/qihu'
15
- require 'query/engine/qihu_mobile'
16
- require 'query/engine/sogou'
17
- require 'query/engine/sogou_mobile'
@@ -8,7 +8,7 @@ module Query
8
8
  :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
9
9
  }
10
10
 
11
- def self.suggestions(query,options = {})
11
+ def self.suggestions(query,options = [])
12
12
  require 'json'
13
13
  query = URI.encode(query)
14
14
  suggestions = HTTParty.get("https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=#{query}&json=1&p=3&sid=&req=2&csor=0&cb=jQuery1102036467162938788533_1437556180622&_=#{(Time.now.to_f*1000).to_i}",options)
@@ -1,26 +1,41 @@
1
1
  module Query
2
- module Engine
3
- class BaiduMobile
4
- include Query::Engine
5
- BaseUri = 'http://m.baidu.com/s?'
6
- Options = {
7
- :headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
8
- }
2
+ module Engine
3
+ class BaiduMobile
4
+ include Query::Engine
5
+ Host = 'm.baidu.com'
6
+ Options = {
7
+ :headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
8
+ }
9
9
 
10
- #基本查询,相当于从搜索框直接输入关键词查询
11
- def query(wd)
12
- queryStr = "word=#{wd}"
13
- uri = URI.encode((BaseUri + queryStr))
14
- # begin
15
- res = HTTParty.get(uri,Options)
16
- r = Query::Result::BaiduMobile.new(res)
17
- r.baseuri = uri
18
- r
19
- # rescue Exception => e
20
- # warn "#{__FILE__} #{__LINE__} #{uri} fetch error: #{e.to_s}"
21
- # return false
22
- # end
23
- end
10
+ #基本查询,相当于从搜索框直接输入关键词查询
11
+ def self.query(wd, params={})
12
+ q = Array.new
13
+ q << "word=#{URI.encode(wd)}"
14
+ q << "rn=#{@perpage.to_i}" if @perpage
15
+ # Join arguments
16
+ params.each do |k, v|
17
+ q << "#{k.to_s}=#{v.to_s}"
24
18
  end
19
+ uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
20
+ # begin
21
+ res = HTTParty.get(uri, Options)
22
+ r = Query::Result::BaiduMobile.new(res)
23
+ r.baseuri, r.options = uri, Options
24
+ r
25
+ # rescue Exception => e
26
+ # warn "#{__FILE__} #{__LINE__} #{uri} fetch error: #{e.to_s}"
27
+ # return false
28
+ # end
29
+ end
30
+
31
+ def self.suggestions(query,options = {})
32
+ require 'json'
33
+ query = URI.encode(query)
34
+
35
+ suggestions = HTTParty.get("https://m.baidu.com/su?callback=jsonp11&wd=#{query}",options).to_s
36
+ suggestions = suggestions.force_encoding('GB18030').encode('UTF-8')
37
+ suggestions = suggestions.split('s:[')[1].delete(']});').split(',').uniq
38
+ end
25
39
  end
40
+ end
26
41
  end
@@ -1,2 +1,24 @@
1
1
  module Query
2
- end
2
+ module Engine
3
+ class QihuMobile
4
+ Host = "m.haosou.com"
5
+ Options = {
6
+ :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
7
+ }
8
+ def self.query(wd, params={})
9
+ q = Array.new
10
+ q << "q=#{URI.encode(wd)}"
11
+ #q << "rn=#{@perpage.to_i}" if @perpage
12
+ # Join arguments
13
+ params.each do |k, v|
14
+ q << "#{k.to_s}=#{v.to_s}"
15
+ end
16
+ uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
17
+ res = HTTParty.get(uri, Options)
18
+ r = Query::Result::QihuMobile.new(res)
19
+ r.baseuri, r.options = uri, Options
20
+ r
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,32 @@
1
+ module Query
2
+ module Engine
3
+ class SMobile
4
+ include Query::Engine
5
+ Host = 'm.sm.cn'
6
+ Options = {
7
+ :headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
8
+ }
9
+
10
+ #基本查询,相当于从搜索框直接输入关键词查询
11
+ def self.query(wd, params={})
12
+ q = Array.new
13
+ q << "q=#{URI.encode(wd)}"
14
+ #q << "rn=#{@perpage.to_i}" if @perpage
15
+ # Join arguments
16
+ params.each do |k, v|
17
+ q << "#{k.to_s}=#{v.to_s}"
18
+ end
19
+ uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
20
+ # begin
21
+ res = HTTParty.get(uri, Options)
22
+ r = Query::Result::SMobile.new(res)
23
+ r.baseuri, r.options = uri, Options
24
+ r
25
+ # rescue Exception => e
26
+ # warn "#{__FILE__} #{__LINE__} #{uri} fetch error: #{e.to_s}"
27
+ # return false
28
+ # end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,21 +1,25 @@
1
1
  module Query
2
- module Engine
3
- class SogouMobile
4
- include Query::Engine
5
- BaseUri = 'http://wap.sogou.com/web/searchList.jsp'
6
- Options = {
7
- :headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
8
- }
9
- class << self
10
- def query(wd)
11
- queryStr = "keyword=#{wd}"
12
- uri = URI.encode(BaseUri + "?" + queryStr)
13
- res = HTTParty.get(uri,Options)
14
- r = Query::Result::SogouMobile.new(res)
15
- r.baseuri = uri
16
- r
17
- end
18
- end
19
- end
20
- end
2
+ module Engine
3
+ class SogouMobile
4
+ include Query::Engine
5
+ Host = 'wap.sogou.com'
6
+ Options = {
7
+ :headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
8
+ }
9
+ def self.query(wd, params={})
10
+ q = Array.new
11
+ q << "keyword=#{URI.encode(wd)}"
12
+ #q << "rn=#{@perpage.to_i}" if @perpage
13
+ # Join arguments
14
+ params.each do |k, v|
15
+ q << "#{k.to_s}=#{v.to_s}"
16
+ end
17
+ uri = URI::HTTP.build(:host=>Host,:path=>'/web/searchList.jsp',:query=>q.join('&'))
18
+ res = HTTParty.get(uri, Options)
19
+ r = Query::Result::SogouMobile.new(res)
20
+ r.baseuri, r.options = uri, Options
21
+ r
22
+ end
23
+ end
24
+ end
21
25
  end
data/lib/query/result.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module Query
2
2
  module Result
3
- attr_accessor :baseuri,:pagenumber,:perpage
3
+ attr_accessor :baseuri,:pagenumber,:perpage,:options
4
4
  def initialize(page)
5
5
  @page = Nokogiri::HTML page
6
6
  @pagenumber = 1
@@ -32,24 +32,13 @@ module Query
32
32
 
33
33
  def next
34
34
  return false unless next_url
35
- @next_url = URI.join(@baseuri,next_url).to_s
36
- next_page = HTTParty.get @next_url
37
- next_page = self.class.new(next_page)
38
- next_page.baseuri = @next_url
39
- next_page.pagenumber = @pagenumber + 1
40
- next_page.perpage = @perpage
41
- r = next_page
42
- r.baseuri = @next_url
35
+ @next_url = URI.join(@baseuri, next_url)
36
+ next_page = HTTParty.get(@next_url, @options)
37
+ r = self.class.new(next_page)
38
+ r.pagenumber, r.perpage, r.options, r.baseuri = @pagenumber + 1, @perpage, @options, @baseuri
43
39
  r
44
40
  end
45
41
  end
46
42
  end
47
- require 'nokogiri'
48
- require "addressable/uri"
49
- require 'query/result/baidu'
50
- require 'query/result/baidu_mobile'
51
- require 'query/result/baidu_mobile_api'
52
- require 'query/result/qihu'
53
- require 'query/result/qihu_mobile'
54
- require 'query/result/sogou'
55
- require 'query/result/sogou_mobile'
43
+ require "nokogiri"
44
+ require "addressable/uri"
@@ -2,54 +2,44 @@ module Query
2
2
  module Result
3
3
  class Baidu
4
4
  include Query::Result
5
- def seo_ranks
6
- return @ranks unless @ranks.nil?
7
- @page.search("//*[@class='result']|//*[@class='result-op']|//*[@class='result-op c-container']|//*[@class='result c-container']").map.with_index do |table,index|
8
- parse_seo(table).merge({:rank => index + 1})
9
- end
5
+
6
+ def html
7
+ @page.to_html
10
8
  end
11
9
 
12
- # def ads_top
13
- # @page.search("//*[@class='result']/preceding-sibling::*[contains(@class,'EC_result')]").map.with_index do |div, index|
14
- # parse_ad(div).merge(:rank => index + 1)
15
- # end
16
- # end
10
+ def relatives
11
+ @page.search("//div[@id='rs']/table/tr/th/a").map{|tag| tag.text}
12
+ end
17
13
 
18
- def ads_left
19
- @page.xpath("//div[@id='content_left']//*[contains(@class,'EC_result')]",MyFilter.new).map.with_index do |div,index|
20
- parse_ad(div)#.merge(:rank => index + 1)
14
+ def seo_ranks
15
+ return @ranks unless @ranks.nil?
16
+ @page.search("//div[@id='content_left']/*[contains(@class, 'result')]").map.with_index do |div,index|
17
+ parse_seo(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10)
21
18
  end
22
19
  end
23
20
 
24
21
  def ads_top
25
- ads_left.uniq.map.with_index do |ad,index|
26
- ad.merge(:rank => index + 1)
22
+ @page.search("//div[@id='content_left']/*[not(contains(@class, 'result') or contains(@class, 'leftBlock') or name()='br' or @id='rs_top_new' or @id='super_se_tip' or @class='rs') and position()<=7]").map.with_index do |div, index|
23
+ parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10)
27
24
  end
28
25
  end
29
26
 
30
27
  def ads_bottom
31
- # @page.search("//*[@class='result']/following-sibling::*[contains(@class,'EC_result')]").map.with_index do |div,index|
32
- # parse_ad(div)#.merge(:rank => index + 1)
33
- # end
34
- ads_top
28
+ @page.search("//div[@id='content_left']/*[not(contains(@class, 'result') or contains(@class, 'leftBlock') or name()='br' or @id='rs_top_new' or @id='super_se_tip' or @class='rs') and position()>=11]").map.with_index do |div, index|
29
+ parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10)
30
+ end
35
31
  end
36
32
 
37
33
  def ads_right
38
- @page.search("//div[@id='ec_im_container']/div[@id]").map.with_index do |div,index|
39
- a = div.search('a').first
40
- url = div.search("*[@class='EC_url']").first.text
41
- url = "http://#{url}"
42
- {
43
- :rank => index + 1,
44
- :text => a.text.strip,
45
- :href => a['href'].strip,
46
- :host => Addressable::URI.parse(URI.encode(url)).host
47
- }
34
+ @page.search("//div[@id='ec_im_container']/div[contains(@class, 'EC_idea')]").map.with_index do |div,index|
35
+ parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10)
48
36
  end
49
37
  end
50
38
 
51
39
  def count
52
- @count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
40
+ node = @page.search("//div[@class='nums']") + @page.search("//span[@class='nums']")
41
+ @count ||= node.map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
42
+ @count
53
43
  end
54
44
 
55
45
  def related_keywords
@@ -71,36 +61,42 @@ module Query
71
61
  private
72
62
  def parse_ad(div)
73
63
  #@todo should be :
74
- #title = div.xpath("*[contains(@class,'ec_title')]",MyFilter.new).first
75
- title = div.xpath(".//*[contains(@class,'ec_title')]",MyFilter.new).first
76
- url = div.xpath(".//*[contains(@class,'ec_url')]",MyFilter.new).first
64
+ title = %w(div[1]/h3/a tbody/tr[2]/td/a[1] a[1]).inject(nil){|ans, xpath| ans || div.xpath(xpath).first}
65
+ url = %w(div[3]/span tbody/tr[2]/td/a[2] a[3]/font[last()]).inject(nil){|ans, xpath| ans || div.xpath(xpath).first}
77
66
  url = url.nil? ? 'www.baidu.com' : url.text
78
67
  url = "http://" + url
79
- {
80
- :text => title.text,
81
- :href => title['href'],
82
- :host => Addressable::URI.parse(URI.encode(url)).host
83
- }
84
- end
85
68
 
86
- def parse_seo(table)
87
- url = %w( span[@class="g"] span[@class="c-showurl"] span[@class="op_wiseapp_showurl"] div[@class="op_zhidao_showurl"]).map do |xpath|
88
- span = table.search(xpath).first
89
- span.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip if span
90
- end.compact.first
91
- if url and !url.empty?
92
- host = Addressable::URI.parse(URI.encode("http://#{url}")).host
93
- else
94
- host = nil
69
+ begin
70
+ {
71
+ :text => title.text.strip,
72
+ :href => title['href'].to_s.strip,
73
+ :host => Addressable::URI.parse(URI.encode(url)).host
74
+ }
75
+ rescue Exception => e
76
+ warn "Error in parse_ad method : " + e.message
77
+ {}
95
78
  end
96
- href = table.search('a').first['href']
97
- href = href.strip if href
79
+ end
98
80
 
99
- {
100
- :text => table.search("h3").first.text.strip,
101
- :href => href,
102
- :host => host
103
- }
81
+ def parse_seo(div)
82
+ title = %w(div[1]/h3/a h3/a div/div[1]/div[1]/div tr[2]/td/table/tr/td/h3/a).inject(nil){|ans, xpath| ans || div.xpath(xpath).first}
83
+ url = %w(span[@class="g"] span[@class="c-showurl"]/span[@class="c-showurl"] span[@class="c-showurl"] span[@class="op_wiseapp_showurl"] div[@class="op_zhidao_showurl"]).inject(nil){|ans, xpath| ans || div.search(xpath).first}
84
+ url = url.nil? ? 'www.baidu.com' : url.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip
85
+ url = "http://" + url
86
+ # url = Query::get_redirect_url(title['href'].to_s.strip) if url.include?('elong.com') && title['href']
87
+ # url = 'http://www.baidu.com' if url.empty?
88
+
89
+ begin
90
+ {
91
+ :is_vr=> div['class'].include?("result-op"),
92
+ :text => title.text.strip,
93
+ :href => title['href'].to_s.strip,
94
+ :host => Addressable::URI.parse(URI.encode(url)).host
95
+ }
96
+ rescue Exception => e
97
+ warn "Error in parse_seo method : " + e.message
98
+ {}
99
+ end
104
100
  end
105
101
  end
106
102
  end