query 0.0.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.gitignore +4 -1
 - data/Gemfile +3 -1
 - data/README.md +6 -1
 - data/lib/query/engine/baidu.rb +12 -8
 - data/lib/query/engine/baidu_mobile.rb +4 -4
 - data/lib/query/engine/{qihoo.rb → qihu.rb} +8 -3
 - data/lib/query/engine/{qihoo_mobile.rb → qihu_mobile.rb} +0 -0
 - data/lib/query/engine/sogou.rb +45 -0
 - data/lib/query/engine/sogou_mobile.rb +21 -0
 - data/lib/query/engine.rb +11 -4
 - data/lib/query/result/baidu.rb +57 -91
 - data/lib/query/result/baidu_mobile.rb +49 -93
 - data/lib/query/result/qihu.rb +66 -0
 - data/lib/query/result/{qihoo_mobile.rb → qihu_mobile.rb} +1 -1
 - data/lib/query/result/sogou.rb +103 -0
 - data/lib/query/result/sogou_mobile.rb +51 -0
 - data/lib/query/result.rb +47 -4
 - data/lib/query/version.rb +1 -1
 - data/lib/query.rb +6 -8
 - data/query.gemspec +2 -3
 - data/spec/baidu1_spec.rb +157 -0
 - data/spec/baidu2_spec.rb +156 -0
 - data/spec/mbaidu1_spec.rb +167 -0
 - data/spec/msogou_spec.rb +91 -0
 - data/spec/qihu_spec.rb +87 -0
 - data/spec/samples/baidu1.html +521 -0
 - data/spec/samples/baidu2.html +662 -0
 - data/spec/samples/mbaidu1.html +2 -0
 - data/spec/samples/mbaidu2.html +2 -0
 - data/spec/samples/msogou.html +474 -0
 - data/spec/samples/qihu.html +506 -0
 - data/spec/samples/sogou.html +629 -0
 - data/spec/sogou_mobile_spec.rb +86 -0
 - data/spec/sogou_spec.rb +107 -0
 - data/spec/spec_helper.rb +12 -1
 - metadata +56 -31
 - data/lib/query/engine/base.rb +0 -16
 - data/lib/query/result/base.rb +0 -50
 - data/lib/query/result/qihoo.rb +0 -75
 - data/spec/baidu_mobile_spec.rb +0 -19
 - data/spec/baidu_spec.rb +0 -73
 - data/spec/qihoo_spec.rb +0 -27
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: e5043b0180a473ab3d213136dfe9ab55ccb4a6d9
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: dc0b8b1ee15dc3f4437439712904de92b838520e
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: baa8ca09bc7bfd19f1eb3e9dffa24cf0cc28aba704d8671769bc26a79c68f10c01d57df4f76d4e6558ea638fd6c211111ca21d1883675450971256fc8369bc2a
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 804e6685b6d7d49e563318a9150eae1a655c52cb6f6ca9084c7fbb0c908fde92a44025761668abe2b7aa4b21ba31fbb0c9ebcbc24a6d3c61fd2415e5344b9fd4
         
     | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/Gemfile
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | 
         @@ -3,22 +3,27 @@ Query 
     | 
|
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            #to get the result list by querying "abc"
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
       6 
7 
     | 
    
         
             
            Query::Engine::Baidu.new.query("abc").ranks().each do |id,value|
         
     | 
| 
       7 
8 
     | 
    
         
             
                puts id,value
         
     | 
| 
       8 
9 
     | 
    
         
             
            end
         
     | 
| 
       9 
10 
     | 
    
         | 
| 
       10 
11 
     | 
    
         
             
            #to get the result list with host "www.abc.com.cn" by querying "abc"
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
       11 
13 
     | 
    
         
             
            Query::Engine::Baidu.new.query("abc").ranks("www.abc.com.cn").each do |id,value|
         
     | 
| 
       12 
14 
     | 
    
         
             
                puts id,value
         
     | 
| 
       13 
15 
     | 
    
         
             
            end
         
     | 
| 
       14 
16 
     | 
    
         | 
| 
       15 
17 
     | 
    
         
             
            #to get the result list with host which fit the regex /com.cn/ by querying "abc"
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
       16 
19 
     | 
    
         
             
            Query::Engine::Baidu.new.query("abc").ranks(/com.cn/).each do |id,value|
         
     | 
| 
       17 
20 
     | 
    
         
             
                puts id,value
         
     | 
| 
       18 
21 
     | 
    
         
             
            end
         
     | 
| 
       19 
22 
     | 
    
         | 
| 
       20 
23 
     | 
    
         
             
            # to get the top rank of host "www.abc.com.cn" by querying "abc"
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
       21 
25 
     | 
    
         
             
            Query::Engine::Baidu.new.query("abc").rank("www.abc.com.cn")
         
     | 
| 
       22 
26 
     | 
    
         | 
| 
       23 
27 
     | 
    
         
             
            TODO:
         
     | 
| 
       24 
     | 
    
         
            -
            查询结果不多,翻页不存在时的处理,及rspec
         
     | 
| 
      
 28 
     | 
    
         
            +
            查询结果不多,翻页不存在时的处理,及rspec
         
     | 
| 
      
 29 
     | 
    
         
            +
            增加其他搜索引擎
         
     | 
    
        data/lib/query/engine/baidu.rb
    CHANGED
    
    | 
         @@ -1,7 +1,11 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Query
         
     | 
| 
       2 
2 
     | 
    
         
             
                module Engine
         
     | 
| 
       3 
     | 
    
         
            -
                    class Baidu 
     | 
| 
      
 3 
     | 
    
         
            +
                    class Baidu
         
     | 
| 
      
 4 
     | 
    
         
            +
                        include Query::Engine
         
     | 
| 
       4 
5 
     | 
    
         
             
                        BaseUri = 'http://www.baidu.com/s?'
         
     | 
| 
      
 6 
     | 
    
         
            +
                        Options = {
         
     | 
| 
      
 7 
     | 
    
         
            +
                            :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
         
     | 
| 
      
 8 
     | 
    
         
            +
                        }
         
     | 
| 
       5 
9 
     | 
    
         
             
                        def self.suggestions(wd)
         
     | 
| 
       6 
10 
     | 
    
         
             
                            require 'json'
         
     | 
| 
       7 
11 
     | 
    
         
             
                            json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").force_encoding('GBK').encode("UTF-8")
         
     | 
| 
         @@ -37,25 +41,25 @@ module Query 
     | 
|
| 
       37 
41 
     | 
    
         
             
                            return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash"
         
     | 
| 
       38 
42 
     | 
    
         
             
                        end
         
     | 
| 
       39 
43 
     | 
    
         | 
| 
       40 
     | 
    
         
            -
                        def query(wd)
         
     | 
| 
      
 44 
     | 
    
         
            +
                        def self.query(wd)
         
     | 
| 
       41 
45 
     | 
    
         
             
                            q = Array.new
         
     | 
| 
       42 
46 
     | 
    
         
             
                            q << "wd=#{wd}"
         
     | 
| 
       43 
47 
     | 
    
         
             
                            q << "rn=#{@perpage.to_i}" if @perpage
         
     | 
| 
       44 
48 
     | 
    
         
             
                            queryStr = q.join("&")
         
     | 
| 
       45 
49 
     | 
    
         
             
                            #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
         
     | 
| 
       46 
50 
     | 
    
         
             
                            uri = URI.encode((BaseUri + queryStr))
         
     | 
| 
       47 
     | 
    
         
            -
                            begin
         
     | 
| 
      
 51 
     | 
    
         
            +
                            # begin
         
     | 
| 
       48 
52 
     | 
    
         
             
                                # @page = @a.get uri
         
     | 
| 
       49 
     | 
    
         
            -
                                @page = HTTParty.get 
     | 
| 
      
 53 
     | 
    
         
            +
                                @page = HTTParty.get(uri,Options)
         
     | 
| 
       50 
54 
     | 
    
         
             
                                r = Query::Result::Baidu.new(@page)
         
     | 
| 
       51 
55 
     | 
    
         
             
                                r.baseuri = uri
         
     | 
| 
       52 
56 
     | 
    
         
             
                                r.pagenumber = 1
         
     | 
| 
       53 
57 
     | 
    
         
             
                                r.perpage = @perpage
         
     | 
| 
       54 
58 
     | 
    
         
             
                                r
         
     | 
| 
       55 
     | 
    
         
            -
                            rescue Exception => e
         
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
       57 
     | 
    
         
            -
             
     | 
| 
       58 
     | 
    
         
            -
                            end
         
     | 
| 
      
 59 
     | 
    
         
            +
                            # rescue Exception => e
         
     | 
| 
      
 60 
     | 
    
         
            +
                            #     warn e.to_s
         
     | 
| 
      
 61 
     | 
    
         
            +
                            #     return false
         
     | 
| 
      
 62 
     | 
    
         
            +
                            # end
         
     | 
| 
       59 
63 
     | 
    
         
             
            =begin
         
     | 
| 
       60 
64 
     | 
    
         
             
                            query = "#{query}"
         
     | 
| 
       61 
65 
     | 
    
         
             
                            @uri = BaseUri+URI.encode(query.encode('GBK'))
         
     | 
| 
         @@ -1,11 +1,11 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Query
         
     | 
| 
       2 
2 
     | 
    
         
             
                module Engine
         
     | 
| 
       3 
     | 
    
         
            -
                    class BaiduMobile 
     | 
| 
      
 3 
     | 
    
         
            +
                    class BaiduMobile
         
     | 
| 
      
 4 
     | 
    
         
            +
                        include Query::Engine
         
     | 
| 
       4 
5 
     | 
    
         
             
                        BaseUri = 'http://m.baidu.com/s?'
         
     | 
| 
       5 
     | 
    
         
            -
                         
     | 
| 
       6 
     | 
    
         
            -
                            "User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'
         
     | 
| 
      
 6 
     | 
    
         
            +
                        Options = {
         
     | 
| 
      
 7 
     | 
    
         
            +
                            :headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
         
     | 
| 
       7 
8 
     | 
    
         
             
                        }
         
     | 
| 
       8 
     | 
    
         
            -
                        Options = {:headers => headers}
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
10 
     | 
    
         
             
                        #基本查询,相当于从搜索框直接输入关键词查询
         
     | 
| 
       11 
11 
     | 
    
         
             
                        def query(wd)
         
     | 
| 
         @@ -1,15 +1,20 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Query
         
     | 
| 
       2 
2 
     | 
    
         
             
                module Engine
         
     | 
| 
       3 
     | 
    
         
            -
                    class  
     | 
| 
      
 3 
     | 
    
         
            +
                    class Qihu
         
     | 
| 
      
 4 
     | 
    
         
            +
                        include Query::Engine
         
     | 
| 
       4 
5 
     | 
    
         
             
                        Host = 'www.so.com'
         
     | 
| 
      
 6 
     | 
    
         
            +
                        headers = {
         
     | 
| 
      
 7 
     | 
    
         
            +
                            "User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'
         
     | 
| 
      
 8 
     | 
    
         
            +
                        }
         
     | 
| 
      
 9 
     | 
    
         
            +
                        Options = {:headers => headers}
         
     | 
| 
       5 
10 
     | 
    
         
             
                        #基本查询, 相当于在搜索框直接数据关键词查询
         
     | 
| 
       6 
11 
     | 
    
         
             
                        def query(wd)
         
     | 
| 
       7 
12 
     | 
    
         
             
                            #用原始路径请求
         
     | 
| 
       8 
13 
     | 
    
         
             
                            uri = URI.join("http://#{Host}/",URI.encode('s?q='+wd)).to_s
         
     | 
| 
       9 
     | 
    
         
            -
                            page = HTTParty.get(uri)
         
     | 
| 
      
 14 
     | 
    
         
            +
                            page = HTTParty.get(uri,Options)
         
     | 
| 
       10 
15 
     | 
    
         
             
                            #如果请求地址被跳转,重新获取当前页的URI,可避免翻页错误
         
     | 
| 
       11 
16 
     | 
    
         
             
                            uri = URI.join("http://#{Host}/",page.request.path).to_s
         
     | 
| 
       12 
     | 
    
         
            -
                            r = Query::Result:: 
     | 
| 
      
 17 
     | 
    
         
            +
                            r = Query::Result::Qihu.new(page)
         
     | 
| 
       13 
18 
     | 
    
         
             
                            r.baseuri = uri
         
     | 
| 
       14 
19 
     | 
    
         
             
                            r
         
     | 
| 
       15 
20 
     | 
    
         
             
                        end
         
     | 
| 
         
            File without changes
         
     | 
| 
         @@ -0,0 +1,45 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Query
         
     | 
| 
      
 2 
     | 
    
         
            +
            	module Engine
         
     | 
| 
      
 3 
     | 
    
         
            +
            		class Sogou
         
     | 
| 
      
 4 
     | 
    
         
            +
            			include Query::Engine
         
     | 
| 
      
 5 
     | 
    
         
            +
            			BaseUri = 'http://www.sogou.com/web?'
         
     | 
| 
      
 6 
     | 
    
         
            +
                  Options = {
         
     | 
| 
      
 7 
     | 
    
         
            +
                      :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
         
     | 
| 
      
 8 
     | 
    
         
            +
                  }
         
     | 
| 
      
 9 
     | 
    
         
            +
            			class << self
         
     | 
| 
      
 10 
     | 
    
         
            +
            				def query(wd)
         
     | 
| 
      
 11 
     | 
    
         
            +
            					q = []
         
     | 
| 
      
 12 
     | 
    
         
            +
            					q << "query=#{wd}"
         
     | 
| 
      
 13 
     | 
    
         
            +
            					uri = URI.encode BaseUri+q.join('&')
         
     | 
| 
      
 14 
     | 
    
         
            +
            					page = HTTParty.get(uri,Options)
         
     | 
| 
      
 15 
     | 
    
         
            +
            					r = Query::Result::Sogou.new(page)
         
     | 
| 
      
 16 
     | 
    
         
            +
            					r.baseuri = uri
         
     | 
| 
      
 17 
     | 
    
         
            +
            					r.perpage = @perpage
         
     | 
| 
      
 18 
     | 
    
         
            +
            					r.pagenumber = 1
         
     | 
| 
      
 19 
     | 
    
         
            +
            					r
         
     | 
| 
      
 20 
     | 
    
         
            +
            				end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            				def suggestions(word)
         
     | 
| 
      
 23 
     | 
    
         
            +
            					suggestions = HTTParty.get "http://w.sugg.sogou.com/sugg/ajaj_json.jsp?key=#{URI.encode(word)}"
         
     | 
| 
      
 24 
     | 
    
         
            +
            					suggestions = suggestions.encode('utf-8').scan /#{word}[^"]+/
         
     | 
| 
      
 25 
     | 
    
         
            +
            					suggestions
         
     | 
| 
      
 26 
     | 
    
         
            +
            				end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                    #site:xxx.yyy.com
         
     | 
| 
      
 29 
     | 
    
         
            +
            				def pages(host)
         
     | 
| 
      
 30 
     | 
    
         
            +
            					query("site:#{host}")
         
     | 
| 
      
 31 
     | 
    
         
            +
            				end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                    #domain:xxx.yyy.com/path/file.html
         
     | 
| 
      
 34 
     | 
    
         
            +
            				def links(uri)
         
     | 
| 
      
 35 
     | 
    
         
            +
            					query("domain:\"#{uri}\"")
         
     | 
| 
      
 36 
     | 
    
         
            +
            				end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                    #site:xxx.yyy.com inurl:zzz
         
     | 
| 
      
 39 
     | 
    
         
            +
            				# def pages_with(host,string)
         
     | 
| 
      
 40 
     | 
    
         
            +
                #       query("site:#{host} inurl:#{string}")
         
     | 
| 
      
 41 
     | 
    
         
            +
            				# end
         
     | 
| 
      
 42 
     | 
    
         
            +
            			end
         
     | 
| 
      
 43 
     | 
    
         
            +
            		end
         
     | 
| 
      
 44 
     | 
    
         
            +
            	end
         
     | 
| 
      
 45 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,21 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Query
         
     | 
| 
      
 2 
     | 
    
         
            +
            	module Engine
         
     | 
| 
      
 3 
     | 
    
         
            +
            		class SogouMobile
         
     | 
| 
      
 4 
     | 
    
         
            +
            			include Query::Engine
         
     | 
| 
      
 5 
     | 
    
         
            +
            			BaseUri = 'http://wap.sogou.com/web/searchList.jsp'
         
     | 
| 
      
 6 
     | 
    
         
            +
            			Options = {
         
     | 
| 
      
 7 
     | 
    
         
            +
            				:headers => {"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'}
         
     | 
| 
      
 8 
     | 
    
         
            +
            			}
         
     | 
| 
      
 9 
     | 
    
         
            +
            			class << self
         
     | 
| 
      
 10 
     | 
    
         
            +
            				def query(wd)
         
     | 
| 
      
 11 
     | 
    
         
            +
            				queryStr = "keyword=#{wd}"
         
     | 
| 
      
 12 
     | 
    
         
            +
            				uri = URI.encode(BaseUri + "?" + queryStr)
         
     | 
| 
      
 13 
     | 
    
         
            +
            				res = HTTParty.get(uri,Options)
         
     | 
| 
      
 14 
     | 
    
         
            +
            				r = Query::Result::SogouMobile.new(res)
         
     | 
| 
      
 15 
     | 
    
         
            +
            				r.baseuri = uri
         
     | 
| 
      
 16 
     | 
    
         
            +
            				r
         
     | 
| 
      
 17 
     | 
    
         
            +
            				end
         
     | 
| 
      
 18 
     | 
    
         
            +
            			end
         
     | 
| 
      
 19 
     | 
    
         
            +
            		end
         
     | 
| 
      
 20 
     | 
    
         
            +
            	end
         
     | 
| 
      
 21 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/query/engine.rb
    CHANGED
    
    | 
         @@ -1,10 +1,17 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Query
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Engine
         
     | 
| 
      
 3 
     | 
    
         
            +
                attr_accessor :perpage
         
     | 
| 
      
 4 
     | 
    
         
            +
                def self.indexed?(url)
         
     | 
| 
      
 5 
     | 
    
         
            +
                  URI(url)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  result = query(url)
         
     | 
| 
      
 7 
     | 
    
         
            +
                  return result.has_result?
         
     | 
| 
      
 8 
     | 
    
         
            +
                end
         
     | 
| 
       3 
9 
     | 
    
         
             
              end
         
     | 
| 
       4 
10 
     | 
    
         
             
            end
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
            require 'query/engine/base'
         
     | 
| 
      
 11 
     | 
    
         
            +
            require 'httparty'
         
     | 
| 
       7 
12 
     | 
    
         
             
            require 'query/engine/baidu'
         
     | 
| 
       8 
13 
     | 
    
         
             
            require 'query/engine/baidu_mobile'
         
     | 
| 
       9 
     | 
    
         
            -
            require 'query/engine/ 
     | 
| 
       10 
     | 
    
         
            -
            require 'query/engine/ 
     | 
| 
      
 14 
     | 
    
         
            +
            require 'query/engine/qihu'
         
     | 
| 
      
 15 
     | 
    
         
            +
            require 'query/engine/qihu_mobile'
         
     | 
| 
      
 16 
     | 
    
         
            +
            require 'query/engine/sogou'
         
     | 
| 
      
 17 
     | 
    
         
            +
            require 'query/engine/sogou_mobile'
         
     | 
    
        data/lib/query/result/baidu.rb
    CHANGED
    
    | 
         @@ -1,94 +1,40 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Query
         
     | 
| 
       2 
2 
     | 
    
         
             
                module Result
         
     | 
| 
       3 
     | 
    
         
            -
                    class Baidu 
     | 
| 
       4 
     | 
    
         
            -
                         
     | 
| 
      
 3 
     | 
    
         
            +
                    class Baidu
         
     | 
| 
      
 4 
     | 
    
         
            +
                        include Query::Result
         
     | 
| 
      
 5 
     | 
    
         
            +
                        def seo_ranks
         
     | 
| 
       5 
6 
     | 
    
         
             
                            return @ranks unless @ranks.nil?
         
     | 
| 
       6 
     | 
    
         
            -
                            @ 
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
                                id = table['id']
         
     | 
| 
       9 
     | 
    
         
            -
                                # if @perpage == 10
         
     | 
| 
       10 
     | 
    
         
            -
                                #     id = table['id'][-1,1]
         
     | 
| 
       11 
     | 
    
         
            -
                                #     id = '10' if id == '0'
         
     | 
| 
       12 
     | 
    
         
            -
                                # end
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
                                @ranks[id] = Hash.new
         
     | 
| 
       15 
     | 
    
         
            -
                                url = table.search("[@class=\"g\"]").first
         
     | 
| 
       16 
     | 
    
         
            -
                                url = url.text unless url.nil?
         
     | 
| 
       17 
     | 
    
         
            -
                                a = table.search("h3").first
         
     | 
| 
       18 
     | 
    
         
            -
                                next if a.nil?
         
     | 
| 
       19 
     | 
    
         
            -
                                @ranks[id]['text'] = a.text
         
     | 
| 
       20 
     | 
    
         
            -
                                @ranks[id]['href'] = url #a.first['href'].sub('http://www.baidu.com/link?url=','').strip
         
     | 
| 
       21 
     | 
    
         
            -
                                unless url.nil?
         
     | 
| 
       22 
     | 
    
         
            -
                                    url = url.strip
         
     | 
| 
       23 
     | 
    
         
            -
                                    @ranks[id]['host'] = Addressable::URI.parse(URI.encode("http://#{url}")).host
         
     | 
| 
       24 
     | 
    
         
            -
                                else
         
     | 
| 
       25 
     | 
    
         
            -
                                    @ranks[id]['host'] = nil
         
     | 
| 
       26 
     | 
    
         
            -
                                end
         
     | 
| 
      
 7 
     | 
    
         
            +
                            @page.search("//*[@class='result']|//*[@class='result-op']|//*[@class='result-op c-container']").map.with_index do |table,index|
         
     | 
| 
      
 8 
     | 
    
         
            +
                                parse_seo(table).merge({:rank => index + 1})
         
     | 
| 
       27 
9 
     | 
    
         
             
                            end
         
     | 
| 
       28 
     | 
    
         
            -
                            #@page.search("//table[@class=\"result\"]").map{|table|@page.search("//table[@id=\"#{table['id']}\"]//span[@class=\"g\"]").first}.map{|rank|URI(URI.encode('http://'+rank.text.strip)).host unless rank.nil?}
         
     | 
| 
       29 
     | 
    
         
            -
                            @ranks
         
     | 
| 
       30 
10 
     | 
    
         
             
                        end
         
     | 
| 
       31 
11 
     | 
    
         | 
| 
       32 
     | 
    
         
            -
                        def ads_bottom
         
     | 
| 
       33 
     | 
    
         
            -
                            return {} if @page.search("//table[@bgcolor='f5f5f5']").empty?
         
     | 
| 
       34 
     | 
    
         
            -
                            return ads_top
         
     | 
| 
       35 
     | 
    
         
            -
                            # p @page.search("//table[@bgcolor='f5f5f5']").empty?
         
     | 
| 
       36 
     | 
    
         
            -
                        end
         
     | 
| 
       37 
12 
     | 
    
         
             
                        def ads_top
         
     | 
| 
       38 
     | 
    
         
            -
                             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
                            @page.search("//table[@bgcolor='#f5f5f5']").each do |table|
         
     | 
| 
       41 
     | 
    
         
            -
                                id = table['id']
         
     | 
| 
       42 
     | 
    
         
            -
                                next if id.nil?
         
     | 
| 
       43 
     | 
    
         
            -
                                id = id[2,3].to_i.to_s
         
     | 
| 
       44 
     | 
    
         
            -
                                ads[id]= parse_ad(table)
         
     | 
| 
      
 13 
     | 
    
         
            +
                            @page.search("//*[@class='result']/preceding-sibling::*[contains(@class,'EC_result')]").map.with_index do |div, index|
         
     | 
| 
      
 14 
     | 
    
         
            +
                                parse_ad(div).merge(:rank => index + 1)
         
     | 
| 
       45 
15 
     | 
    
         
             
                            end
         
     | 
| 
       46 
     | 
    
         
            -
                            #白色底推广,只有上部分
         
     | 
| 
       47 
     | 
    
         
            -
                            if ads.empty?
         
     | 
| 
       48 
     | 
    
         
            -
                                @page.search("//table").each do |table|
         
     | 
| 
       49 
     | 
    
         
            -
                                    id = table['id']
         
     | 
| 
       50 
     | 
    
         
            -
                                    next if id.nil? or id.to_i<3000
         
     | 
| 
       51 
     | 
    
         
            -
                                    id = id[2,3].to_i.to_s
         
     | 
| 
       52 
     | 
    
         
            -
                                    ads[id]= parse_ad(table)
         
     | 
| 
       53 
     | 
    
         
            -
                                end
         
     | 
| 
       54 
     | 
    
         
            -
                            end
         
     | 
| 
       55 
     | 
    
         
            -
                            ads
         
     | 
| 
       56 
16 
     | 
    
         
             
                        end
         
     | 
| 
       57 
     | 
    
         
            -
             
     | 
| 
       58 
     | 
    
         
            -
             
     | 
| 
       59 
     | 
    
         
            -
                             
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                        def ads_bottom
         
     | 
| 
      
 19 
     | 
    
         
            +
                            @page.search("//*[@class='result']/following-sibling::*[contains(@class,'EC_result')]").map.with_index do |div,index|
         
     | 
| 
      
 20 
     | 
    
         
            +
                                parse_ad(div).merge(:rank => index + 1)
         
     | 
| 
      
 21 
     | 
    
         
            +
                            end
         
     | 
| 
       61 
22 
     | 
    
         
             
                        end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
       62 
24 
     | 
    
         
             
                        def ads_right
         
     | 
| 
       63 
     | 
    
         
            -
                             
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
                                 
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
                                     
     | 
| 
       69 
     | 
    
         
            -
                                     
     | 
| 
       70 
     | 
    
         
            -
                                     
     | 
| 
       71 
     | 
    
         
            -
                                     
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
                                    ads[id.to_s] = {'title'=>title,'href'=>url,'host'=>url}
         
     | 
| 
       74 
     | 
    
         
            -
                                end
         
     | 
| 
      
 25 
     | 
    
         
            +
                            @page.search("//div[@id='ec_im_container']/div[@id]").map.with_index do |div,index|
         
     | 
| 
      
 26 
     | 
    
         
            +
                                a = div.search('a').first
         
     | 
| 
      
 27 
     | 
    
         
            +
                                url = div.search("*[@class='EC_url']").first.text
         
     | 
| 
      
 28 
     | 
    
         
            +
                                url = "http://#{url}"
         
     | 
| 
      
 29 
     | 
    
         
            +
                                {
         
     | 
| 
      
 30 
     | 
    
         
            +
                                    :rank => index + 1,
         
     | 
| 
      
 31 
     | 
    
         
            +
                                    :text => a.text.strip,
         
     | 
| 
      
 32 
     | 
    
         
            +
                                    :href => a['href'].strip,
         
     | 
| 
      
 33 
     | 
    
         
            +
                                    :host => Addressable::URI.parse(URI.encode(url)).host
         
     | 
| 
      
 34 
     | 
    
         
            +
                                }
         
     | 
| 
       75 
35 
     | 
    
         
             
                            end
         
     | 
| 
       76 
     | 
    
         
            -
                            ads
         
     | 
| 
       77 
36 
     | 
    
         
             
                        end
         
     | 
| 
       78 
37 
     | 
    
         | 
| 
       79 
     | 
    
         
            -
                        #return the top rank number from @ranks with the input host
         
     | 
| 
       80 
     | 
    
         
            -
                        # def rank(host)#on base of ranks
         
     | 
| 
       81 
     | 
    
         
            -
                        #     ranks.each do |id,line|
         
     | 
| 
       82 
     | 
    
         
            -
                        #         id = id.to_i
         
     | 
| 
       83 
     | 
    
         
            -
                        #         if host.class == Regexp
         
     | 
| 
       84 
     | 
    
         
            -
                        #             return id if line['host'] =~ host
         
     | 
| 
       85 
     | 
    
         
            -
                        #         elsif host.class == String
         
     | 
| 
       86 
     | 
    
         
            -
                        #             return id if line['host'] == host
         
     | 
| 
       87 
     | 
    
         
            -
                        #         end
         
     | 
| 
       88 
     | 
    
         
            -
                        #     end
         
     | 
| 
       89 
     | 
    
         
            -
                        #     return nil
         
     | 
| 
       90 
     | 
    
         
            -
                        # end
         
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
38 
     | 
    
         
             
                        def count
         
     | 
| 
       93 
39 
     | 
    
         
             
                            @count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
         
     | 
| 
       94 
40 
     | 
    
         
             
                        end
         
     | 
| 
         @@ -97,25 +43,45 @@ module Query 
     | 
|
| 
       97 
43 
     | 
    
         
             
                            @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
         
     | 
| 
       98 
44 
     | 
    
         
             
                        end
         
     | 
| 
       99 
45 
     | 
    
         | 
| 
       100 
     | 
    
         
            -
                        def next
         
     | 
| 
       101 
     | 
    
         
            -
                            url = @page.xpath('//a[text()="下一页>"]').first
         
     | 
| 
       102 
     | 
    
         
            -
                            return if url.nil?
         
     | 
| 
       103 
     | 
    
         
            -
                            url = url['href']
         
     | 
| 
       104 
     | 
    
         
            -
                            url = URI.join(@baseuri,url).to_s
         
     | 
| 
       105 
     | 
    
         
            -
                            page = HTTParty.get(url)
         
     | 
| 
       106 
     | 
    
         
            -
                            r = Query::Result::Baidu.new(page)
         
     | 
| 
       107 
     | 
    
         
            -
                            r.baseuri = url
         
     | 
| 
       108 
     | 
    
         
            -
                            r.pagenumber=@pagenumber+1
         
     | 
| 
       109 
     | 
    
         
            -
                            r.perpage=@perpage
         
     | 
| 
       110 
     | 
    
         
            -
                            r
         
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
     | 
    
         
            -
                            # @page = BaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil?
         
     | 
| 
       113 
     | 
    
         
            -
                        end
         
     | 
| 
       114 
46 
     | 
    
         
             
                        def has_result?
         
     | 
| 
       115 
47 
     | 
    
         
             
                            submit = @page.search('//a[text()="提交网址"]').first
         
     | 
| 
       116 
48 
     | 
    
         
             
                            return false if submit and submit['href'].include?'sitesubmit'
         
     | 
| 
       117 
49 
     | 
    
         
             
                            return true
         
     | 
| 
       118 
50 
     | 
    
         
             
                        end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                        def next_url
         
     | 
| 
      
 53 
     | 
    
         
            +
                            @page.search("//a[text()='下一页>']").first['href']
         
     | 
| 
      
 54 
     | 
    
         
            +
                        end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                        private
         
     | 
| 
      
 57 
     | 
    
         
            +
                        def parse_ad(div)
         
     | 
| 
      
 58 
     | 
    
         
            +
                            #@todo  should be :
         
     | 
| 
      
 59 
     | 
    
         
            +
                            #title = div.xpath("*[contains(@class,'ec_title')]",MyFilter.new).first
         
     | 
| 
      
 60 
     | 
    
         
            +
                            title = div.xpath("//*[contains(@class,'ec_title')]",MyFilter.new).first
         
     | 
| 
      
 61 
     | 
    
         
            +
                            url = %w( span[@class='ec_url']  a[@class='EC_url'] ).map do |xpath|
         
     | 
| 
      
 62 
     | 
    
         
            +
                                node = div.search(xpath).first
         
     | 
| 
      
 63 
     | 
    
         
            +
                                node.text if node
         
     | 
| 
      
 64 
     | 
    
         
            +
                            end.compact.first
         
     | 
| 
      
 65 
     | 
    
         
            +
                            url = "http://" + url
         
     | 
| 
      
 66 
     | 
    
         
            +
                            {
         
     | 
| 
      
 67 
     | 
    
         
            +
                                :text => title.text,
         
     | 
| 
      
 68 
     | 
    
         
            +
                                :href => title['href'],
         
     | 
| 
      
 69 
     | 
    
         
            +
                                :host => Addressable::URI.parse(URI.encode(url)).host
         
     | 
| 
      
 70 
     | 
    
         
            +
                            }
         
     | 
| 
      
 71 
     | 
    
         
            +
                        end
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                        def parse_seo(table)
         
     | 
| 
      
 74 
     | 
    
         
            +
                            url = %w( span[@class="g"]  span[@class="c-showurl"] div[@class="op_zhidao_showurl"]).map do |xpath|
         
     | 
| 
      
 75 
     | 
    
         
            +
                                span = table.search(xpath).first
         
     | 
| 
      
 76 
     | 
    
         
            +
                                span.text.sub(/\d{4}-\d{1,2}-\d{1,2}/,'').strip if span
         
     | 
| 
      
 77 
     | 
    
         
            +
                            end.compact.first
         
     | 
| 
      
 78 
     | 
    
         
            +
                            host = Addressable::URI.parse(URI.encode("http://#{url}")).host
         
     | 
| 
      
 79 
     | 
    
         
            +
                            {
         
     | 
| 
      
 80 
     | 
    
         
            +
                                :text => table.search("h3").first.text.strip,
         
     | 
| 
      
 81 
     | 
    
         
            +
                                :href => table.search('a').first['href'].strip,
         
     | 
| 
      
 82 
     | 
    
         
            +
                                :host => host
         
     | 
| 
      
 83 
     | 
    
         
            +
                            }
         
     | 
| 
      
 84 
     | 
    
         
            +
                        end
         
     | 
| 
       119 
85 
     | 
    
         
             
                    end
         
     | 
| 
       120 
86 
     | 
    
         
             
                end
         
     | 
| 
       121 
87 
     | 
    
         
             
            end
         
     | 
| 
         @@ -1,113 +1,69 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Query
         
     | 
| 
       2 
2 
     | 
    
         
             
                module Result
         
     | 
| 
       3 
     | 
    
         
            -
                    class BaiduMobile 
     | 
| 
       4 
     | 
    
         
            -
                         
     | 
| 
       5 
     | 
    
         
            -
                        def ranks
         
     | 
| 
       6 
     | 
    
         
            -
                            #如果已经赋值说明解析过,不需要重新解析,直接返回结果
         
     | 
| 
       7 
     | 
    
         
            -
                            return @ranks unless @ranks.nil?
         
     | 
| 
       8 
     | 
    
         
            -
                            @ranks = Hash.new
         
     | 
| 
       9 
     | 
    
         
            -
                            @page.xpath('//div[@class="result"]').each do |result|
         
     | 
| 
       10 
     | 
    
         
            -
                                href,text,host,is_mobile = '','','',false
         
     | 
| 
       11 
     | 
    
         
            -
                                a = result.search("a").first
         
     | 
| 
       12 
     | 
    
         
            -
                                is_mobile = true unless a.search("img").empty?
         
     | 
| 
       13 
     | 
    
         
            -
                                host = result.search('[@class="site"]').first
         
     | 
| 
       14 
     | 
    
         
            -
                                next if host.nil?
         
     | 
| 
       15 
     | 
    
         
            -
                                host = host.text
         
     | 
| 
       16 
     | 
    
         
            -
                                href = a['href']
         
     | 
| 
       17 
     | 
    
         
            -
                                text = a.text
         
     | 
| 
       18 
     | 
    
         
            -
                                id = href.scan(/&order=(\d+)&/)
         
     | 
| 
       19 
     | 
    
         
            -
                                if id.empty?
         
     | 
| 
       20 
     | 
    
         
            -
                                    id = nil
         
     | 
| 
       21 
     | 
    
         
            -
                                else
         
     | 
| 
       22 
     | 
    
         
            -
                                    id = id.first.first.to_i
         
     | 
| 
       23 
     | 
    
         
            -
                                    # id = (@pagenumber-1)*10+id
         
     | 
| 
       24 
     | 
    
         
            -
                                end
         
     | 
| 
       25 
     | 
    
         
            -
            =begin
         
     | 
| 
       26 
     | 
    
         
            -
                                result.children.each do |elem|
         
     | 
| 
       27 
     | 
    
         
            -
                                    if elem.name == 'a'
         
     | 
| 
       28 
     | 
    
         
            -
                                        href = elem['href']
         
     | 
| 
       29 
     | 
    
         
            -
                                        id = elem.text.match(/^\d+/).to_s.to_i
         
     | 
| 
       30 
     | 
    
         
            -
                                        text = elem.text.sub(/^\d+/,'')
         
     | 
| 
       31 
     | 
    
         
            -
                                        text.sub!(/^\u00A0/,'')
         
     | 
| 
       32 
     | 
    
         
            -
                                    elsif elem['class'] == 'abs'
         
     | 
| 
       33 
     | 
    
         
            -
                                        elem.children.each do |elem2|
         
     | 
| 
       34 
     | 
    
         
            -
                                            if elem2['class'] == 'site'
         
     | 
| 
       35 
     | 
    
         
            -
                                                host = elem2.text
         
     | 
| 
       36 
     | 
    
         
            -
                                                break
         
     | 
| 
       37 
     | 
    
         
            -
                                            end
         
     | 
| 
       38 
     | 
    
         
            -
                                        end
         
     | 
| 
       39 
     | 
    
         
            -
                                    elsif elem['class'] == 'site'
         
     | 
| 
       40 
     | 
    
         
            -
                                        host == elem['href']
         
     | 
| 
       41 
     | 
    
         
            -
                                    end
         
     | 
| 
       42 
     | 
    
         
            -
                                end
         
     | 
| 
       43 
     | 
    
         
            -
            =end
         
     | 
| 
      
 3 
     | 
    
         
            +
                    class BaiduMobile
         
     | 
| 
      
 4 
     | 
    
         
            +
                        include Query::Result
         
     | 
| 
       44 
5 
     | 
    
         | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
      
 6 
     | 
    
         
            +
                        def seo_ranks
         
     | 
| 
      
 7 
     | 
    
         
            +
                            @seo_ranks ||= @page.search("//*[@class='result']|//*[@class='card-result wa-ue-card-result']|//*[@class='result card-result wma-card-box']").map.with_index do |div,index|
         
     | 
| 
      
 8 
     | 
    
         
            +
                                parse_seo(div).merge({:rank => index + 1})
         
     | 
| 
       46 
9 
     | 
    
         
             
                            end
         
     | 
| 
       47 
     | 
    
         
            -
                            @ranks
         
     | 
| 
       48 
10 
     | 
    
         
             
                        end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
       49 
12 
     | 
    
         
             
                        def ads_top
         
     | 
| 
       50 
     | 
    
         
            -
                             
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
                                id += 1
         
     | 
| 
       54 
     | 
    
         
            -
                                href = div.search("span[@class='ec_site']").first.text
         
     | 
| 
       55 
     | 
    
         
            -
                                href = "http://#{href}"
         
     | 
| 
       56 
     | 
    
         
            -
                                title = div.search("a/text()").text.strip
         
     | 
| 
       57 
     | 
    
         
            -
                                host = Addressable::URI.parse(URI.encode(href)).host
         
     | 
| 
       58 
     | 
    
         
            -
                                result[id] = {'title'=>title,'href'=>href,'host'=>host}
         
     | 
| 
      
 13 
     | 
    
         
            +
                            @ads_top ||= @page.search("//*[@class='result']/preceding-sibling::div[@class='ec_wise_ad']/div").map.with_index do |div,index|
         
     | 
| 
      
 14 
     | 
    
         
            +
                                puts index
         
     | 
| 
      
 15 
     | 
    
         
            +
                                parse_ad(div).merge({:rank => index + 1})
         
     | 
| 
       59 
16 
     | 
    
         
             
                            end
         
     | 
| 
       60 
     | 
    
         
            -
                            result
         
     | 
| 
       61 
17 
     | 
    
         
             
                        end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
       62 
19 
     | 
    
         
             
                        def ads_right
         
     | 
| 
       63 
20 
     | 
    
         
             
                            []
         
     | 
| 
       64 
21 
     | 
    
         
             
                        end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
       65 
23 
     | 
    
         
             
                        def ads_bottom
         
     | 
| 
       66 
     | 
    
         
            -
                            []
         
     | 
| 
      
 24 
     | 
    
         
            +
                            @ads_bottom ||= @page.search("//*[@class='result']/following-sibling::div[@class='ec_wise_ad']/div/div").map.with_index do |div,index|
         
     | 
| 
      
 25 
     | 
    
         
            +
                                parse_ad(div).merge({:rank => index + 1})
         
     | 
| 
      
 26 
     | 
    
         
            +
                            end
         
     | 
| 
       67 
27 
     | 
    
         
             
                        end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                        #酒店预订 酒店英文 酒店团购 酒店管理 酒店招聘 快捷酒店 如家快捷酒店 五星级酒店
         
     | 
| 
       68 
30 
     | 
    
         
             
                        def related_keywords
         
     | 
| 
       69 
     | 
    
         
            -
                            @related_keywords ||= @page.search("div[@class=' 
     | 
| 
      
 31 
     | 
    
         
            +
                            @related_keywords ||= @page.search("div[@class='rw-list']/a").map{|a|a.text}
         
     | 
| 
       70 
32 
     | 
    
         
             
                        end
         
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
                         
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
                            host_ranks = Hash.new
         
     | 
| 
       75 
     | 
    
         
            -
                            ranks.each do |id,line|
         
     | 
| 
       76 
     | 
    
         
            -
                                if specific_host.class == Regexp
         
     | 
| 
       77 
     | 
    
         
            -
                                    host_ranks[id] = line if line['host'] =~ specific_host
         
     | 
| 
       78 
     | 
    
         
            -
                                elsif specific_host.class == String
         
     | 
| 
       79 
     | 
    
         
            -
                                    host_ranks[id] = line if line['host'] == specific_host
         
     | 
| 
       80 
     | 
    
         
            -
                                end
         
     | 
| 
       81 
     | 
    
         
            -
                            end
         
     | 
| 
       82 
     | 
    
         
            -
                            host_ranks
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                        def next_url
         
     | 
| 
      
 35 
     | 
    
         
            +
                           @next_url ||= @page.xpath('//a[contains(text(),"下一页")]').first['href']
         
     | 
| 
       83 
36 
     | 
    
         
             
                        end
         
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
                        def  
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
                                id = id.to_i
         
     | 
| 
       88 
     | 
    
         
            -
                                if host.class == Regexp
         
     | 
| 
       89 
     | 
    
         
            -
                                    return id if line['host'] =~ host
         
     | 
| 
       90 
     | 
    
         
            -
                                elsif host.class == String
         
     | 
| 
       91 
     | 
    
         
            -
                                    return id if line['host'] == host
         
     | 
| 
       92 
     | 
    
         
            -
                                end
         
     | 
| 
       93 
     | 
    
         
            -
                            end
         
     | 
| 
       94 
     | 
    
         
            -
                            return nil
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                        def count
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
       95 
40 
     | 
    
         
             
                        end
         
     | 
| 
       96 
     | 
    
         
            -
             
     | 
| 
       97 
     | 
    
         
            -
                         
     | 
| 
       98 
     | 
    
         
            -
                        def  
     | 
| 
       99 
     | 
    
         
            -
                             
     | 
| 
       100 
     | 
    
         
            -
                             
     | 
| 
       101 
     | 
    
         
            -
                             
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
             
     | 
| 
       105 
     | 
    
         
            -
                             
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
       107 
     | 
    
         
            -
             
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
                             
     | 
| 
       110 
     | 
    
         
            -
                             
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                        private
         
     | 
| 
      
 43 
     | 
    
         
            +
                        def parse_ad(div)
         
     | 
| 
      
 44 
     | 
    
         
            +
                            url = div.search("span[@class='ec_site']").first.text
         
     | 
| 
      
 45 
     | 
    
         
            +
                            url = "http://#{url}"
         
     | 
| 
      
 46 
     | 
    
         
            +
                            {
         
     | 
| 
      
 47 
     | 
    
         
            +
                                :text => div.search('a/text()').text.strip,
         
     | 
| 
      
 48 
     | 
    
         
            +
                                :href => div.search('a').first['href'],
         
     | 
| 
      
 49 
     | 
    
         
            +
                                :host => Addressable::URI.parse(URI.encode(url)).host
         
     | 
| 
      
 50 
     | 
    
         
            +
                            }
         
     | 
| 
      
 51 
     | 
    
         
            +
                        end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                        def parse_seo(div)
         
     | 
| 
      
 54 
     | 
    
         
            +
                            a = div.search('a').first
         
     | 
| 
      
 55 
     | 
    
         
            +
                            if div['class'] == 'card-result wa-ue-card-result'
         
     | 
| 
      
 56 
     | 
    
         
            +
                                host = div.search("*[@class='wa-hotelgeneral-gray wa-hotelgeneral-info-sub-title']").text
         
     | 
| 
      
 57 
     | 
    
         
            +
                            elsif div['class'] == 'result card-result wma-card-box' and div['srcid'] == 'map'
         
     | 
| 
      
 58 
     | 
    
         
            +
                                host = 'map.baidu.com'
         
     | 
| 
      
 59 
     | 
    
         
            +
                            else
         
     | 
| 
      
 60 
     | 
    
         
            +
                                host = div.search("*[@class='site']").first.text
         
     | 
| 
      
 61 
     | 
    
         
            +
                            end
         
     | 
| 
      
 62 
     | 
    
         
            +
                            {
         
     | 
| 
      
 63 
     | 
    
         
            +
                                :text => a.text,
         
     | 
| 
      
 64 
     | 
    
         
            +
                                :href => a['href'],
         
     | 
| 
      
 65 
     | 
    
         
            +
                                :host => host
         
     | 
| 
      
 66 
     | 
    
         
            +
                            }
         
     | 
| 
       111 
67 
     | 
    
         
             
                        end
         
     | 
| 
       112 
68 
     | 
    
         
             
                    end
         
     | 
| 
       113 
69 
     | 
    
         
             
                end
         
     | 
| 
         @@ -0,0 +1,66 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Query
         
     | 
| 
      
 2 
     | 
    
         
            +
              module Result
         
     | 
| 
      
 3 
     | 
    
         
            +
                class Qihu
         
     | 
| 
      
 4 
     | 
    
         
            +
                  include Query::Result
         
     | 
| 
      
 5 
     | 
    
         
            +
                  def seo_ranks
         
     | 
| 
      
 6 
     | 
    
         
            +
                    @page.search('//ul[@id="m-result"]/li//h3').map.with_index do |h3,index|
         
     | 
| 
      
 7 
     | 
    
         
            +
                      a = h3.search('a').first
         
     | 
| 
      
 8 
     | 
    
         
            +
                      {
         
     | 
| 
      
 9 
     | 
    
         
            +
                        :rank => index + 1,
         
     | 
| 
      
 10 
     | 
    
         
            +
                        :href => a['href'],
         
     | 
| 
      
 11 
     | 
    
         
            +
                        :text => a.text.strip,
         
     | 
| 
      
 12 
     | 
    
         
            +
                        :host => Addressable::URI.parse(a['href']).host
         
     | 
| 
      
 13 
     | 
    
         
            +
                      }
         
     | 
| 
      
 14 
     | 
    
         
            +
                    end
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  def ads_top
         
     | 
| 
      
 18 
     | 
    
         
            +
                    @page.search("//ul[@id='djbox']/li").map.with_index do |li,index|
         
     | 
| 
      
 19 
     | 
    
         
            +
                      a = li.search("a").first
         
     | 
| 
      
 20 
     | 
    
         
            +
                      href = CGI.parse(URI(a['_cs']).query)['aurl'].first
         
     | 
| 
      
 21 
     | 
    
         
            +
                      {
         
     | 
| 
      
 22 
     | 
    
         
            +
                        :rank => index + 1,
         
     | 
| 
      
 23 
     | 
    
         
            +
                        :text => a.text,
         
     | 
| 
      
 24 
     | 
    
         
            +
                        :href => href,
         
     | 
| 
      
 25 
     | 
    
         
            +
                        :host => Addressable::URI.parse(URI.encode(href)).host
         
     | 
| 
      
 26 
     | 
    
         
            +
                      }
         
     | 
| 
      
 27 
     | 
    
         
            +
                    end
         
     | 
| 
      
 28 
     | 
    
         
            +
                  end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                  def ads_bottom
         
     | 
| 
      
 31 
     | 
    
         
            +
                    []
         
     | 
| 
      
 32 
     | 
    
         
            +
                  end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                  def ads_right
         
     | 
| 
      
 35 
     | 
    
         
            +
                    @page.search("//ul[@id='rightbox']/li").map.with_index do |li,index|
         
     | 
| 
      
 36 
     | 
    
         
            +
                      a = li.search('a').first
         
     | 
| 
      
 37 
     | 
    
         
            +
                      href = CGI.parse(URI(a['_cs']).query)['aurl'].first
         
     | 
| 
      
 38 
     | 
    
         
            +
                      host = Addressable::URI.parse(URI.encode(href)).host
         
     | 
| 
      
 39 
     | 
    
         
            +
                      {
         
     | 
| 
      
 40 
     | 
    
         
            +
                        :rank => index + 1,
         
     | 
| 
      
 41 
     | 
    
         
            +
                        :text => a.text,
         
     | 
| 
      
 42 
     | 
    
         
            +
                        :href => href,
         
     | 
| 
      
 43 
     | 
    
         
            +
                        :host => host
         
     | 
| 
      
 44 
     | 
    
         
            +
                      }
         
     | 
| 
      
 45 
     | 
    
         
            +
                    end
         
     | 
| 
      
 46 
     | 
    
         
            +
                  end
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                  def related_keywords
         
     | 
| 
      
 49 
     | 
    
         
            +
                    []
         
     | 
| 
      
 50 
     | 
    
         
            +
                  end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                  def count
         
     | 
| 
      
 53 
     | 
    
         
            +
                    @page.search('//span[@class="nums"]').first.text.gsub(/\D/,'').to_i
         
     | 
| 
      
 54 
     | 
    
         
            +
                  end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                  #下一页
         
     | 
| 
      
 57 
     | 
    
         
            +
                  def next_url
         
     | 
| 
      
 58 
     | 
    
         
            +
                    next_href = @page.xpath('//a[@id="snext"]').first['href']
         
     | 
| 
      
 59 
     | 
    
         
            +
                  end
         
     | 
| 
      
 60 
     | 
    
         
            +
                  #有结果
         
     | 
| 
      
 61 
     | 
    
         
            +
                  def has_result?
         
     | 
| 
      
 62 
     | 
    
         
            +
                    !@page.search('//div[@id="main"]/h3').text().include?'没有找到该URL'
         
     | 
| 
      
 63 
     | 
    
         
            +
                  end
         
     | 
| 
      
 64 
     | 
    
         
            +
                end
         
     | 
| 
      
 65 
     | 
    
         
            +
              end
         
     | 
| 
      
 66 
     | 
    
         
            +
            end
         
     |