query 0.1.25 → 0.1.28
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -2
- data/LICENSE +13 -12
- data/README.md +14 -11
- data/lib/query.rb +6 -3
- data/lib/query/engine.rb +0 -7
- data/lib/query/engine/baidu.rb +1 -1
- data/lib/query/engine/baidu_mobile.rb +36 -21
- data/lib/query/engine/qihu_mobile.rb +23 -1
- data/lib/query/engine/sm_mobile.rb +32 -0
- data/lib/query/engine/sogou_mobile.rb +23 -19
- data/lib/query/result.rb +7 -18
- data/lib/query/result/baidu.rb +51 -55
- data/lib/query/result/baidu_mobile.rb +114 -56
- data/lib/query/result/qihu_mobile.rb +48 -3
- data/lib/query/result/sm_mobile.rb +95 -0
- data/lib/query/result/sogou.rb +2 -5
- data/lib/query/result/sogou_mobile.rb +79 -39
- data/query.gemspec +7 -8
- data/spec/mbaidu_spec.rb +62 -0
- data/spec/qihu_mobile_spec.rb +33 -0
- data/spec/samples/mbaidu.html +3 -0
- data/spec/sm_mobile_spec.rb +94 -0
- data/spec/sogou_mobile_spec.rb +0 -2
- data/spec/spec_helper.rb +1 -1
- metadata +40 -31
- data/lib/query/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8d3d289a2e63a1c88194c7b13a954927c5c5d99
|
4
|
+
data.tar.gz: 6efec7b1990e7216d9bcfd2a049f3f58b4e3e5a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c1083ec4211a68b2311831c01b9e22e4ada3de8f4119a9d2da43d3c44407f69b2902ab7321b69902d32f8f98aa0921a240588533156ef1cc2976937a13f90bc3
|
7
|
+
data.tar.gz: 9cbe62910dc172dd29af51e6ddc74ec36d36ee1b20201a9cbc00961467a745c51f49400ced9a62818ad0304bc9e1475764d80312c4a848184e47ad423f1fd8f7
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
@@ -1,20 +1,21 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c)
|
3
|
+
Copyright (c) 2015 Warriors Of the Night
|
4
4
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
this software and associated documentation files (the "Software"), to deal
|
7
|
-
the Software without restriction, including without limitation the rights
|
8
|
-
use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
the Software, and to permit persons to whom the Software is
|
10
|
-
subject to the following conditions:
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
11
|
|
12
12
|
The above copyright notice and this permission notice shall be included in all
|
13
13
|
copies or substantial portions of the Software.
|
14
14
|
|
15
15
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
CHANGED
@@ -13,31 +13,34 @@ opt = {
|
|
13
13
|
puts ::Query::Engine::Baidu.suggestions('abc',opt)
|
14
14
|
```
|
15
15
|
|
16
|
-
|
16
|
+
#### to get the result list by querying "abc"
|
17
17
|
|
18
|
+
```ruby
|
18
19
|
Query::Engine::Baidu.new.query("abc").ranks().each do |id,value|
|
19
20
|
puts id,value
|
20
21
|
end
|
22
|
+
````
|
23
|
+
#### to get the result list with host "www.abc.com.cn" by querying "abc"
|
21
24
|
|
22
|
-
|
23
|
-
|
25
|
+
```ruby
|
24
26
|
Query::Engine::Baidu.new.query("abc").ranks("www.abc.com.cn").each do |id,value|
|
25
27
|
puts id,value
|
26
28
|
end
|
29
|
+
```
|
30
|
+
#### to get the result list with host which fit the regex /com.cn/ by querying "abc"
|
27
31
|
|
28
|
-
|
29
|
-
|
32
|
+
```ruby
|
30
33
|
Query::Engine::Baidu.new.query("abc").ranks(/com.cn/).each do |id,value|
|
31
34
|
puts id,value
|
32
35
|
end
|
33
|
-
|
34
|
-
|
36
|
+
```
|
37
|
+
#### to get the top rank of host "www.abc.com.cn" by querying "abc"
|
35
38
|
|
36
39
|
```ruby
|
37
40
|
puts Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
|
38
|
-
|
41
|
+
[3,1,2,4] => [rank_seo, rank_top_ads, rank_right_ads, rank_bottom_ads]
|
39
42
|
```
|
40
43
|
|
41
|
-
TODO:
|
42
|
-
查询结果不多,翻页不存在时的处理,及rspec
|
43
|
-
增加其他搜索引擎
|
44
|
+
#### TODO:
|
45
|
+
* 查询结果不多,翻页不存在时的处理,及rspec
|
46
|
+
* 增加其他搜索引擎
|
data/lib/query.rb
CHANGED
@@ -4,11 +4,14 @@ class MyFilter
|
|
4
4
|
# set.any? { |x| x.to_s.downcase == str.downcase}
|
5
5
|
end
|
6
6
|
end
|
7
|
-
|
8
|
-
require '
|
7
|
+
|
8
|
+
require 'require_all'
|
9
|
+
require 'uri'
|
10
|
+
require 'httparty'
|
11
|
+
require_all "#{__dir__}/query"
|
9
12
|
|
10
13
|
module Query
|
11
14
|
def self.get_redirect_url(url)
|
12
|
-
Net::HTTP.get_response(URI(url)).response['location']
|
15
|
+
Net::HTTP.get_response(URI(url)).response['location'] || url
|
13
16
|
end
|
14
17
|
end
|
data/lib/query/engine.rb
CHANGED
@@ -8,10 +8,3 @@ module Query
|
|
8
8
|
end
|
9
9
|
end
|
10
10
|
end
|
11
|
-
require 'httparty'
|
12
|
-
require 'query/engine/baidu'
|
13
|
-
require 'query/engine/baidu_mobile'
|
14
|
-
require 'query/engine/qihu'
|
15
|
-
require 'query/engine/qihu_mobile'
|
16
|
-
require 'query/engine/sogou'
|
17
|
-
require 'query/engine/sogou_mobile'
|
data/lib/query/engine/baidu.rb
CHANGED
@@ -8,7 +8,7 @@ module Query
|
|
8
8
|
:headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
|
9
9
|
}
|
10
10
|
|
11
|
-
def self.suggestions(query,options =
|
11
|
+
def self.suggestions(query,options = [])
|
12
12
|
require 'json'
|
13
13
|
query = URI.encode(query)
|
14
14
|
suggestions = HTTParty.get("https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=#{query}&json=1&p=3&sid=&req=2&csor=0&cb=jQuery1102036467162938788533_1437556180622&_=#{(Time.now.to_f*1000).to_i}",options)
|
@@ -1,26 +1,41 @@
|
|
1
1
|
module Query
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
2
|
+
module Engine
|
3
|
+
class BaiduMobile
|
4
|
+
include Query::Engine
|
5
|
+
Host = 'm.baidu.com'
|
6
|
+
Options = {
|
7
|
+
:headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
|
8
|
+
}
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
r
|
19
|
-
# rescue Exception => e
|
20
|
-
# warn "#{__FILE__} #{__LINE__} #{uri} fetch error: #{e.to_s}"
|
21
|
-
# return false
|
22
|
-
# end
|
23
|
-
end
|
10
|
+
#基本查询,相当于从搜索框直接输入关键词查询
|
11
|
+
def self.query(wd, params={})
|
12
|
+
q = Array.new
|
13
|
+
q << "word=#{URI.encode(wd)}"
|
14
|
+
q << "rn=#{@perpage.to_i}" if @perpage
|
15
|
+
# Join arguments
|
16
|
+
params.each do |k, v|
|
17
|
+
q << "#{k.to_s}=#{v.to_s}"
|
24
18
|
end
|
19
|
+
uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
|
20
|
+
# begin
|
21
|
+
res = HTTParty.get(uri, Options)
|
22
|
+
r = Query::Result::BaiduMobile.new(res)
|
23
|
+
r.baseuri, r.options = uri, Options
|
24
|
+
r
|
25
|
+
# rescue Exception => e
|
26
|
+
# warn "#{__FILE__} #{__LINE__} #{uri} fetch error: #{e.to_s}"
|
27
|
+
# return false
|
28
|
+
# end
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.suggestions(query,options = {})
|
32
|
+
require 'json'
|
33
|
+
query = URI.encode(query)
|
34
|
+
|
35
|
+
suggestions = HTTParty.get("https://m.baidu.com/su?callback=jsonp11&wd=#{query}",options).to_s
|
36
|
+
suggestions = suggestions.force_encoding('GB18030').encode('UTF-8')
|
37
|
+
suggestions = suggestions.split('s:[')[1].delete(']});').split(',').uniq
|
38
|
+
end
|
25
39
|
end
|
40
|
+
end
|
26
41
|
end
|
@@ -1,2 +1,24 @@
|
|
1
1
|
module Query
|
2
|
-
|
2
|
+
module Engine
|
3
|
+
class QihuMobile
|
4
|
+
Host = "m.haosou.com"
|
5
|
+
Options = {
|
6
|
+
:headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
|
7
|
+
}
|
8
|
+
def self.query(wd, params={})
|
9
|
+
q = Array.new
|
10
|
+
q << "q=#{URI.encode(wd)}"
|
11
|
+
#q << "rn=#{@perpage.to_i}" if @perpage
|
12
|
+
# Join arguments
|
13
|
+
params.each do |k, v|
|
14
|
+
q << "#{k.to_s}=#{v.to_s}"
|
15
|
+
end
|
16
|
+
uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
|
17
|
+
res = HTTParty.get(uri, Options)
|
18
|
+
r = Query::Result::QihuMobile.new(res)
|
19
|
+
r.baseuri, r.options = uri, Options
|
20
|
+
r
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Query
|
2
|
+
module Engine
|
3
|
+
class SMobile
|
4
|
+
include Query::Engine
|
5
|
+
Host = 'm.sm.cn'
|
6
|
+
Options = {
|
7
|
+
:headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
|
8
|
+
}
|
9
|
+
|
10
|
+
#基本查询,相当于从搜索框直接输入关键词查询
|
11
|
+
def self.query(wd, params={})
|
12
|
+
q = Array.new
|
13
|
+
q << "q=#{URI.encode(wd)}"
|
14
|
+
#q << "rn=#{@perpage.to_i}" if @perpage
|
15
|
+
# Join arguments
|
16
|
+
params.each do |k, v|
|
17
|
+
q << "#{k.to_s}=#{v.to_s}"
|
18
|
+
end
|
19
|
+
uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
|
20
|
+
# begin
|
21
|
+
res = HTTParty.get(uri, Options)
|
22
|
+
r = Query::Result::SMobile.new(res)
|
23
|
+
r.baseuri, r.options = uri, Options
|
24
|
+
r
|
25
|
+
# rescue Exception => e
|
26
|
+
# warn "#{__FILE__} #{__LINE__} #{uri} fetch error: #{e.to_s}"
|
27
|
+
# return false
|
28
|
+
# end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -1,21 +1,25 @@
|
|
1
1
|
module Query
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
2
|
+
module Engine
|
3
|
+
class SogouMobile
|
4
|
+
include Query::Engine
|
5
|
+
Host = 'wap.sogou.com'
|
6
|
+
Options = {
|
7
|
+
:headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
|
8
|
+
}
|
9
|
+
def self.query(wd, params={})
|
10
|
+
q = Array.new
|
11
|
+
q << "keyword=#{URI.encode(wd)}"
|
12
|
+
#q << "rn=#{@perpage.to_i}" if @perpage
|
13
|
+
# Join arguments
|
14
|
+
params.each do |k, v|
|
15
|
+
q << "#{k.to_s}=#{v.to_s}"
|
16
|
+
end
|
17
|
+
uri = URI::HTTP.build(:host=>Host,:path=>'/web/searchList.jsp',:query=>q.join('&'))
|
18
|
+
res = HTTParty.get(uri, Options)
|
19
|
+
r = Query::Result::SogouMobile.new(res)
|
20
|
+
r.baseuri, r.options = uri, Options
|
21
|
+
r
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
21
25
|
end
|
data/lib/query/result.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Query
|
2
2
|
module Result
|
3
|
-
attr_accessor :baseuri,:pagenumber,:perpage
|
3
|
+
attr_accessor :baseuri,:pagenumber,:perpage,:options
|
4
4
|
def initialize(page)
|
5
5
|
@page = Nokogiri::HTML page
|
6
6
|
@pagenumber = 1
|
@@ -32,24 +32,13 @@ module Query
|
|
32
32
|
|
33
33
|
def next
|
34
34
|
return false unless next_url
|
35
|
-
@next_url = URI.join(@baseuri,next_url)
|
36
|
-
next_page = HTTParty.get @
|
37
|
-
|
38
|
-
|
39
|
-
next_page.pagenumber = @pagenumber + 1
|
40
|
-
next_page.perpage = @perpage
|
41
|
-
r = next_page
|
42
|
-
r.baseuri = @next_url
|
35
|
+
@next_url = URI.join(@baseuri, next_url)
|
36
|
+
next_page = HTTParty.get(@next_url, @options)
|
37
|
+
r = self.class.new(next_page)
|
38
|
+
r.pagenumber, r.perpage, r.options, r.baseuri = @pagenumber + 1, @perpage, @options, @baseuri
|
43
39
|
r
|
44
40
|
end
|
45
41
|
end
|
46
42
|
end
|
47
|
-
require
|
48
|
-
require "addressable/uri"
|
49
|
-
require 'query/result/baidu'
|
50
|
-
require 'query/result/baidu_mobile'
|
51
|
-
require 'query/result/baidu_mobile_api'
|
52
|
-
require 'query/result/qihu'
|
53
|
-
require 'query/result/qihu_mobile'
|
54
|
-
require 'query/result/sogou'
|
55
|
-
require 'query/result/sogou_mobile'
|
43
|
+
require "nokogiri"
|
44
|
+
require "addressable/uri"
|
data/lib/query/result/baidu.rb
CHANGED
@@ -2,54 +2,44 @@ module Query
|
|
2
2
|
module Result
|
3
3
|
class Baidu
|
4
4
|
include Query::Result
|
5
|
-
|
6
|
-
|
7
|
-
@page.
|
8
|
-
parse_seo(table).merge({:rank => index + 1})
|
9
|
-
end
|
5
|
+
|
6
|
+
def html
|
7
|
+
@page.to_html
|
10
8
|
end
|
11
9
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# end
|
16
|
-
# end
|
10
|
+
def relatives
|
11
|
+
@page.search("//div[@id='rs']/table/tr/th/a").map{|tag| tag.text}
|
12
|
+
end
|
17
13
|
|
18
|
-
def
|
19
|
-
@
|
20
|
-
|
14
|
+
def seo_ranks
|
15
|
+
return @ranks unless @ranks.nil?
|
16
|
+
@page.search("//div[@id='content_left']/*[contains(@class, 'result')]").map.with_index do |div,index|
|
17
|
+
parse_seo(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10)
|
21
18
|
end
|
22
19
|
end
|
23
20
|
|
24
21
|
def ads_top
|
25
|
-
|
26
|
-
|
22
|
+
@page.search("//div[@id='content_left']/*[not(contains(@class, 'result') or contains(@class, 'leftBlock') or name()='br' or @id='rs_top_new' or @id='super_se_tip' or @class='rs') and position()<=7]").map.with_index do |div, index|
|
23
|
+
parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10)
|
27
24
|
end
|
28
25
|
end
|
29
26
|
|
30
27
|
def ads_bottom
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
ads_top
|
28
|
+
@page.search("//div[@id='content_left']/*[not(contains(@class, 'result') or contains(@class, 'leftBlock') or name()='br' or @id='rs_top_new' or @id='super_se_tip' or @class='rs') and position()>=11]").map.with_index do |div, index|
|
29
|
+
parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10)
|
30
|
+
end
|
35
31
|
end
|
36
32
|
|
37
33
|
def ads_right
|
38
|
-
@page.search("//div[@id='ec_im_container']/div[@
|
39
|
-
|
40
|
-
url = div.search("*[@class='EC_url']").first.text
|
41
|
-
url = "http://#{url}"
|
42
|
-
{
|
43
|
-
:rank => index + 1,
|
44
|
-
:text => a.text.strip,
|
45
|
-
:href => a['href'].strip,
|
46
|
-
:host => Addressable::URI.parse(URI.encode(url)).host
|
47
|
-
}
|
34
|
+
@page.search("//div[@id='ec_im_container']/div[contains(@class, 'EC_idea')]").map.with_index do |div,index|
|
35
|
+
parse_ad(div).merge(:rank => (index + 1) + (@pagenumber -1) * 10)
|
48
36
|
end
|
49
37
|
end
|
50
38
|
|
51
39
|
def count
|
52
|
-
|
40
|
+
node = @page.search("//div[@class='nums']") + @page.search("//span[@class='nums']")
|
41
|
+
@count ||= node.map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
|
42
|
+
@count
|
53
43
|
end
|
54
44
|
|
55
45
|
def related_keywords
|
@@ -71,36 +61,42 @@ module Query
|
|
71
61
|
private
|
72
62
|
def parse_ad(div)
|
73
63
|
#@todo should be :
|
74
|
-
|
75
|
-
|
76
|
-
url = div.xpath(".//*[contains(@class,'ec_url')]",MyFilter.new).first
|
64
|
+
title = %w(div[1]/h3/a tbody/tr[2]/td/a[1] a[1]).inject(nil){|ans, xpath| ans || div.xpath(xpath).first}
|
65
|
+
url = %w(div[3]/span tbody/tr[2]/td/a[2] a[3]/font[last()]).inject(nil){|ans, xpath| ans || div.xpath(xpath).first}
|
77
66
|
url = url.nil? ? 'www.baidu.com' : url.text
|
78
67
|
url = "http://" + url
|
79
|
-
{
|
80
|
-
:text => title.text,
|
81
|
-
:href => title['href'],
|
82
|
-
:host => Addressable::URI.parse(URI.encode(url)).host
|
83
|
-
}
|
84
|
-
end
|
85
68
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
69
|
+
begin
|
70
|
+
{
|
71
|
+
:text => title.text.strip,
|
72
|
+
:href => title['href'].to_s.strip,
|
73
|
+
:host => Addressable::URI.parse(URI.encode(url)).host
|
74
|
+
}
|
75
|
+
rescue Exception => e
|
76
|
+
warn "Error in parse_ad method : " + e.message
|
77
|
+
{}
|
95
78
|
end
|
96
|
-
|
97
|
-
href = href.strip if href
|
79
|
+
end
|
98
80
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
81
|
+
def parse_seo(div)
|
82
|
+
title = %w(div[1]/h3/a h3/a div/div[1]/div[1]/div tr[2]/td/table/tr/td/h3/a).inject(nil){|ans, xpath| ans || div.xpath(xpath).first}
|
83
|
+
url = %w(span[@class="g"] span[@class="c-showurl"]/span[@class="c-showurl"] span[@class="c-showurl"] span[@class="op_wiseapp_showurl"] div[@class="op_zhidao_showurl"]).inject(nil){|ans, xpath| ans || div.search(xpath).first}
|
84
|
+
url = url.nil? ? 'www.baidu.com' : url.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip
|
85
|
+
url = "http://" + url
|
86
|
+
# url = Query::get_redirect_url(title['href'].to_s.strip) if url.include?('elong.com') && title['href']
|
87
|
+
# url = 'http://www.baidu.com' if url.empty?
|
88
|
+
|
89
|
+
begin
|
90
|
+
{
|
91
|
+
:is_vr=> div['class'].include?("result-op"),
|
92
|
+
:text => title.text.strip,
|
93
|
+
:href => title['href'].to_s.strip,
|
94
|
+
:host => Addressable::URI.parse(URI.encode(url)).host
|
95
|
+
}
|
96
|
+
rescue Exception => e
|
97
|
+
warn "Error in parse_seo method : " + e.message
|
98
|
+
{}
|
99
|
+
end
|
104
100
|
end
|
105
101
|
end
|
106
102
|
end
|