serper 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/README.md +1 -0
 - data/bin/serper +59 -0
 - data/lib/serper.rb +26 -0
 - data/lib/serper/analyser.rb +112 -0
 - data/lib/serper/baidu/crawler.rb +7 -0
 - data/lib/serper/baidu/parser.rb +185 -0
 - data/lib/serper/baidu/weight.rb +144 -0
 - data/lib/serper/crawler.rb +84 -0
 - data/lib/serper/helper.rb +79 -0
 - data/lib/serper/parser.rb +77 -0
 - data/lib/serper/user_agents.yml +183 -0
 - data/lib/serper/version.rb +3 -0
 - metadata +141 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 149badc447dec8ed55714a9ed6768e3f85a9b94e
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 8204acacee0b068b61421c0bc59507ea30717895
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512:
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: d329ccb1dbf584a4a100bf945740a5ff9ffc391a79f34aa598776cc5db901fdefdf0e04e26920060784c302fd9eb935dcf0a2a623a0acdda8b76bdc61c4235b2
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 36a41f00ff396bde0e3c18587bd58359300591572ff346135f340520b16145272c622f228709d0fe72d770a2174f5bf48cc6c9e98e75fc3b22eef78e8ea14f41
         
     | 
    
        data/README.md
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            解析百度的搜索结果页面, 并返回结构化数据以进行后续分析.
         
     | 
    
        data/bin/serper
    ADDED
    
    | 
         @@ -0,0 +1,59 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'serper'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'json'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'pp'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require 'docopt'
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            cmd = File.basename(__FILE__)
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            doc = <<DOCOPT
         
     | 
| 
      
 12 
     | 
    
         
            +
            1. serper -s 'keyword' # search 'keyword' and print parse result
         
     | 
| 
      
 13 
     | 
    
         
            +
            2. serper -s 'keyword' -o output.json # -o means save result to a file
         
     | 
| 
      
 14 
     | 
    
         
            +
            3. serper -f 'file path' # parse html source code from file
         
     | 
| 
      
 15 
     | 
    
         
            +
            4. serper -s 'keyword' -j # search 'keyword' and print parse result in JSON format
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            Usage:
         
     | 
| 
      
 18 
     | 
    
         
            +
              #{cmd} [options]
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            Options:
         
     | 
| 
      
 21 
     | 
    
         
            +
              -h --help               show this help message and exit
         
     | 
| 
      
 22 
     | 
    
         
            +
              -v --version            show version and exit
         
     | 
| 
      
 23 
     | 
    
         
            +
              -a --analyse Name       analyse as the given name
         
     | 
| 
      
 24 
     | 
    
         
            +
              --keywords File         uses with -a, import give keywords File before search
         
     | 
| 
      
 25 
     | 
    
         
            +
              -s --search Keyword     search Keyword and show result
         
     | 
| 
      
 26 
     | 
    
         
            +
              -f --file File          parse local file or given url
         
     | 
| 
      
 27 
     | 
    
         
            +
              -j --json               print JSON output
         
     | 
| 
      
 28 
     | 
    
         
            +
              -o --output File         output JSON result to File
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            DOCOPT
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            begin
         
     | 
| 
      
 33 
     | 
    
         
            +
              options = Docopt::docopt(doc, version: Serper::VERSION)
         
     | 
| 
      
 34 
     | 
    
         
            +
              # pp options
         
     | 
| 
      
 35 
     | 
    
         
            +
            rescue Docopt::Exit => e
         
     | 
| 
      
 36 
     | 
    
         
            +
              puts e.message
         
     | 
| 
      
 37 
     | 
    
         
            +
            end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            result = ''
         
     | 
| 
      
 40 
     | 
    
         
            +
            if options['--analyse']
         
     | 
| 
      
 41 
     | 
    
         
            +
              analyse = Serper.analyse(options['--analyse'])
         
     | 
| 
      
 42 
     | 
    
         
            +
              analyse.import_keywords(options('--keywords'))
         
     | 
| 
      
 43 
     | 
    
         
            +
              analyse.search
         
     | 
| 
      
 44 
     | 
    
         
            +
              result = 'Analyse finished!'
         
     | 
| 
      
 45 
     | 
    
         
            +
            elsif options['--search']
         
     | 
| 
      
 46 
     | 
    
         
            +
              result = Serper.search options['--search']
         
     | 
| 
      
 47 
     | 
    
         
            +
            elsif options['--file']
         
     | 
| 
      
 48 
     | 
    
         
            +
              result = Serper.parse_file options['--file']
         
     | 
| 
      
 49 
     | 
    
         
            +
            else
         
     | 
| 
      
 50 
     | 
    
         
            +
              puts "At least given one of -a/-s/-f"
         
     | 
| 
      
 51 
     | 
    
         
            +
            end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            if options['--json']
         
     | 
| 
      
 54 
     | 
    
         
            +
              puts result.to_json
         
     | 
| 
      
 55 
     | 
    
         
            +
            else
         
     | 
| 
      
 56 
     | 
    
         
            +
              pp result
         
     | 
| 
      
 57 
     | 
    
         
            +
            end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            open(options['--output'],'w').puts result.to_json if options['--output']
         
     | 
    
        data/lib/serper.rb
    ADDED
    
    | 
         @@ -0,0 +1,26 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "serper/version"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "serper/parser"
         
     | 
| 
      
 3 
     | 
    
         
            +
            require "serper/analyser"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            [:baidu].each do |engine_name|
         
     | 
| 
      
 6 
     | 
    
         
            +
              %w{crawler parser weight}.each do |part|
         
     | 
| 
      
 7 
     | 
    
         
            +
                require File.expand_path("../serper/#{engine_name}/#{part}.rb",__FILE__)
         
     | 
| 
      
 8 
     | 
    
         
            +
              end
         
     | 
| 
      
 9 
     | 
    
         
            +
            end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            module Serper
         
     | 
| 
      
 12 
     | 
    
         
            +
              ENGINES = {
         
     | 
| 
      
 13 
     | 
    
         
            +
                  :baidu => Baidu
         
     | 
| 
      
 14 
     | 
    
         
            +
              }
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              def self.search(engine_name,keyword,page=1)
         
     | 
| 
      
 17 
     | 
    
         
            +
                serp = Parser.new(engine_name,keyword,page)
         
     | 
| 
      
 18 
     | 
    
         
            +
                serp.search
         
     | 
| 
      
 19 
     | 
    
         
            +
                serp
         
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              def self.analyse(connection)
         
     | 
| 
      
 23 
     | 
    
         
            +
                Analyser.new(connection)
         
     | 
| 
      
 24 
     | 
    
         
            +
              end
         
     | 
| 
      
 25 
     | 
    
         
            +
            end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
         @@ -0,0 +1,112 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'active_record'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'csv'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'date'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'yaml'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'ruby-progressbar'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            module Serper
         
     | 
| 
      
 8 
     | 
    
         
            +
              class Analyser
         
     | 
| 
      
 9 
     | 
    
         
            +
                def initialize(connection)
         
     | 
| 
      
 10 
     | 
    
         
            +
                  ActiveRecord::Base.establish_connection(connection)
         
     | 
| 
      
 11 
     | 
    
         
            +
                end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                def import_keywords(file)
         
     | 
| 
      
 14 
     | 
    
         
            +
                  CSV.foreach(file) do |l|
         
     | 
| 
      
 15 
     | 
    
         
            +
                    Keyword.find_or_create_by(:term => l[0]) do |r|
         
     | 
| 
      
 16 
     | 
    
         
            +
                      r.pv = l[1]
         
     | 
| 
      
 17 
     | 
    
         
            +
                      r.category = l[2]
         
     | 
| 
      
 18 
     | 
    
         
            +
                      r.url_type = l[3]
         
     | 
| 
      
 19 
     | 
    
         
            +
                      r.url_id = l[4]
         
     | 
| 
      
 20 
     | 
    
         
            +
                    end
         
     | 
| 
      
 21 
     | 
    
         
            +
                  end
         
     | 
| 
      
 22 
     | 
    
         
            +
                end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                def run(date=Date.today,skip=true)
         
     | 
| 
      
 25 
     | 
    
         
            +
                  puts "Serper Analyser on #{date}"
         
     | 
| 
      
 26 
     | 
    
         
            +
                  ENGINES.keys.each do |engine_name|
         
     | 
| 
      
 27 
     | 
    
         
            +
                    puts engine_name
         
     | 
| 
      
 28 
     | 
    
         
            +
                    search_engine(engine_name,date,skip)
         
     | 
| 
      
 29 
     | 
    
         
            +
                  end
         
     | 
| 
      
 30 
     | 
    
         
            +
                end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                def search_engine(engine_name,date,skip=true)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  p = ProgressBar.create(:title => "Searching #{engine_name} - #{date}", :total => Keyword.all.count, :format => '%t (%c/%C) %a %E |%w')
         
     | 
| 
      
 34 
     | 
    
         
            +
                  Keyword.all.each do |k|
         
     | 
| 
      
 35 
     | 
    
         
            +
                    check_exists = Weight.where(:engine => engine_name, :date => date, :keyword_id => k.id)
         
     | 
| 
      
 36 
     | 
    
         
            +
                    if check_exists.any?
         
     | 
| 
      
 37 
     | 
    
         
            +
                      if skip
         
     | 
| 
      
 38 
     | 
    
         
            +
                        next
         
     | 
| 
      
 39 
     | 
    
         
            +
                      else
         
     | 
| 
      
 40 
     | 
    
         
            +
                        check_exists.destroy_all
         
     | 
| 
      
 41 
     | 
    
         
            +
                      end
         
     | 
| 
      
 42 
     | 
    
         
            +
                    end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                    serp = Serper.search(engine_name,k.term)
         
     | 
| 
      
 45 
     | 
    
         
            +
                    serp.weights.each do |w|
         
     | 
| 
      
 46 
     | 
    
         
            +
                      Weight.create(:date => date,
         
     | 
| 
      
 47 
     | 
    
         
            +
                                    :keyword_id => k.id,
         
     | 
| 
      
 48 
     | 
    
         
            +
                                    :engine => engine_name,
         
     | 
| 
      
 49 
     | 
    
         
            +
                                    :side => w[:side],
         
     | 
| 
      
 50 
     | 
    
         
            +
                                    :part => w[:part],
         
     | 
| 
      
 51 
     | 
    
         
            +
                                    :source => w[:type],
         
     | 
| 
      
 52 
     | 
    
         
            +
                                    :name => w[:name],
         
     | 
| 
      
 53 
     | 
    
         
            +
                                    :site => w[:site],
         
     | 
| 
      
 54 
     | 
    
         
            +
                                    :subdomain => w[:subdomain],
         
     | 
| 
      
 55 
     | 
    
         
            +
                                    :path => w[:path],
         
     | 
| 
      
 56 
     | 
    
         
            +
                                    :part_rank => w[:part_rank],
         
     | 
| 
      
 57 
     | 
    
         
            +
                                    :side_rank => w[:side_rank],
         
     | 
| 
      
 58 
     | 
    
         
            +
                                    :side_weight => w[:side_weight],
         
     | 
| 
      
 59 
     | 
    
         
            +
                                    :weight => w[:weight]
         
     | 
| 
      
 60 
     | 
    
         
            +
                      )
         
     | 
| 
      
 61 
     | 
    
         
            +
                    end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                    p.increment
         
     | 
| 
      
 64 
     | 
    
         
            +
                  end
         
     | 
| 
      
 65 
     | 
    
         
            +
                end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                def migrate!
         
     | 
| 
      
 68 
     | 
    
         
            +
                  ActiveRecord::Schema.define do
         
     | 
| 
      
 69 
     | 
    
         
            +
                    create_table :serper_keywords do |t|
         
     | 
| 
      
 70 
     | 
    
         
            +
                      t.string :term
         
     | 
| 
      
 71 
     | 
    
         
            +
                      t.integer :pv
         
     | 
| 
      
 72 
     | 
    
         
            +
                      t.string :category
         
     | 
| 
      
 73 
     | 
    
         
            +
                      t.string :url_type
         
     | 
| 
      
 74 
     | 
    
         
            +
                      t.integer :url_id
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                      t.timestamps
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                      t.index :term
         
     | 
| 
      
 79 
     | 
    
         
            +
                    end
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                    create_table :serper_weights do |t|
         
     | 
| 
      
 82 
     | 
    
         
            +
                      t.date :date
         
     | 
| 
      
 83 
     | 
    
         
            +
                      t.string :engine
         
     | 
| 
      
 84 
     | 
    
         
            +
                      t.integer :keyword_id
         
     | 
| 
      
 85 
     | 
    
         
            +
                      t.string :side # Left Right
         
     | 
| 
      
 86 
     | 
    
         
            +
                      t.string :part
         
     | 
| 
      
 87 
     | 
    
         
            +
                      t.string :source # SEO SEM Special
         
     | 
| 
      
 88 
     | 
    
         
            +
                      t.string :name
         
     | 
| 
      
 89 
     | 
    
         
            +
                      t.string :site
         
     | 
| 
      
 90 
     | 
    
         
            +
                      t.string :subdomain
         
     | 
| 
      
 91 
     | 
    
         
            +
                      t.string :path
         
     | 
| 
      
 92 
     | 
    
         
            +
                      t.integer :part_rank
         
     | 
| 
      
 93 
     | 
    
         
            +
                      t.integer :side_rank
         
     | 
| 
      
 94 
     | 
    
         
            +
                      t.float :side_weight
         
     | 
| 
      
 95 
     | 
    
         
            +
                      t.float :weight
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                      t.timestamps
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
                      t.index [:date, :engine, :keyword_id, :side, :side_rank], name: 'weights_pk_index'
         
     | 
| 
      
 100 
     | 
    
         
            +
                    end
         
     | 
| 
      
 101 
     | 
    
         
            +
                  end
         
     | 
| 
      
 102 
     | 
    
         
            +
                end
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
                class Keyword < ActiveRecord::Base
         
     | 
| 
      
 105 
     | 
    
         
            +
                  self.table_name = 'serper_keywords'
         
     | 
| 
      
 106 
     | 
    
         
            +
                end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                class Weight < ActiveRecord::Base
         
     | 
| 
      
 109 
     | 
    
         
            +
                  self.table_name = 'serper_weights'
         
     | 
| 
      
 110 
     | 
    
         
            +
                end
         
     | 
| 
      
 111 
     | 
    
         
            +
              end
         
     | 
| 
      
 112 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,185 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Serper::Baidu
         
     | 
| 
      
 2 
     | 
    
         
            +
              def _parse_ads_right(file)
         
     | 
| 
      
 3 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 4 
     | 
    
         
            +
                rank = 0
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                file[:doc].search('div#ec_im_container span a.c-icon.efc-cert').each do |div|
         
     | 
| 
      
 7 
     | 
    
         
            +
                  rank += 1
         
     | 
| 
      
 8 
     | 
    
         
            +
                  url = Addressable::URI.parse(Serper::Helper.parse_data_click(div['data-renzheng'])['identity']['a']['url']).query_values['wd'].to_s.sub('@v','') rescue ''
         
     | 
| 
      
 9 
     | 
    
         
            +
                  result << {url: url, rank: rank}
         
     | 
| 
      
 10 
     | 
    
         
            +
                end
         
     | 
| 
      
 11 
     | 
    
         
            +
                result
         
     | 
| 
      
 12 
     | 
    
         
            +
              end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              def _parse_ads_top(file)
         
     | 
| 
      
 15 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 16 
     | 
    
         
            +
                rank = 0
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                file[:doc].search('div#content_left').first.children.each do |div|
         
     | 
| 
      
 19 
     | 
    
         
            +
                  break if div['id'].to_i > 0
         
     | 
| 
      
 20 
     | 
    
         
            +
                  div.search('span a.c-icon.efc-cert').each do |div|
         
     | 
| 
      
 21 
     | 
    
         
            +
                    rank += 1
         
     | 
| 
      
 22 
     | 
    
         
            +
                    url = Addressable::URI.parse(Serper::Helper.parse_data_click(div['data-renzheng'])['identity']['a']['url']).query_values['wd'].to_s.sub('@v', '') rescue ''
         
     | 
| 
      
 23 
     | 
    
         
            +
                    result << {url: url, rank: rank}
         
     | 
| 
      
 24 
     | 
    
         
            +
                  end
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
                result
         
     | 
| 
      
 27 
     | 
    
         
            +
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
              def _parse_con_ar(file)
         
     | 
| 
      
 30 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 31 
     | 
    
         
            +
                divs = file[:doc].search("div#content_right div#con-ar").first
         
     | 
| 
      
 32 
     | 
    
         
            +
                return [] if divs.nil?
         
     | 
| 
      
 33 
     | 
    
         
            +
                divs.children.each do |div|
         
     | 
| 
      
 34 
     | 
    
         
            +
                  next unless div['class'].to_s.include?('result-op')
         
     | 
| 
      
 35 
     | 
    
         
            +
                  result << {:tpl => div['tpl'],
         
     | 
| 
      
 36 
     | 
    
         
            +
                             :data_click => Serper::Helper.parse_data_click(div['data-click'])
         
     | 
| 
      
 37 
     | 
    
         
            +
                  }
         
     | 
| 
      
 38 
     | 
    
         
            +
                end
         
     | 
| 
      
 39 
     | 
    
         
            +
                result
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
              # def _parse_pinpaizhuanqu(file)
         
     | 
| 
      
 43 
     | 
    
         
            +
              #   part = file[:doc].search("div[@id='content_left']").first
         
     | 
| 
      
 44 
     | 
    
         
            +
              #   return false if part.nil?
         
     | 
| 
      
 45 
     | 
    
         
            +
              #
         
     | 
| 
      
 46 
     | 
    
         
            +
              #   part.children[2].name == 'script'
         
     | 
| 
      
 47 
     | 
    
         
            +
              # end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
              def _parse_ranks(file)
         
     | 
| 
      
 50 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 51 
     | 
    
         
            +
                part = file[:doc].search("div[@id='content_left']").first
         
     | 
| 
      
 52 
     | 
    
         
            +
                return result if part.nil?
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                part.children.each do |table|
         
     | 
| 
      
 55 
     | 
    
         
            +
                  next if table.nil?
         
     | 
| 
      
 56 
     | 
    
         
            +
                  id = table['id'].to_i
         
     | 
| 
      
 57 
     | 
    
         
            +
                  next unless id > 0 && id < 3000
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                  r = {:rank => id}
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
                  r[:result_op] = table['class'].to_s.include?('result-op')
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                  r[:fk] = table['fk']
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                  r[:srcid] = table['srcid']
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                  r[:tpl] = table['tpl']
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                  r[:mu] = table['mu']
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
                  url = table.search('h3/a').first
         
     | 
| 
      
 72 
     | 
    
         
            +
                  unless url.nil?
         
     | 
| 
      
 73 
     | 
    
         
            +
                    url = url['href']
         
     | 
| 
      
 74 
     | 
    
         
            +
                    sleep(rand)
         
     | 
| 
      
 75 
     | 
    
         
            +
                    url = Serper::Crawler.get_rank_url('http:'+url).headers['location'] if url.include?('//www.baidu.com/link?')
         
     | 
| 
      
 76 
     | 
    
         
            +
                  end
         
     | 
| 
      
 77 
     | 
    
         
            +
                  r[:url] = url
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
                  r[:title] = Serper::Helper.get_content_safe(table.search('h3'))
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                  r[:content] = Serper::Helper.get_content_safe(table.search('div.c-abstract'))
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                  table.search('a').each do |link|
         
     | 
| 
      
 84 
     | 
    
         
            +
                    r[:baiduopen] = true if link['href'].to_s.include?('open.baidu.com')
         
     | 
| 
      
 85 
     | 
    
         
            +
                  end
         
     | 
| 
      
 86 
     | 
    
         
            +
                  r[:baiduopen] = false if r[:baiduopen].nil?
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                  result << r
         
     | 
| 
      
 89 
     | 
    
         
            +
                end
         
     | 
| 
      
 90 
     | 
    
         
            +
                result
         
     | 
| 
      
 91 
     | 
    
         
            +
              end
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
              # def _parse_related_keywords(file)
         
     | 
| 
      
 94 
     | 
    
         
            +
              #   result = []
         
     | 
| 
      
 95 
     | 
    
         
            +
              #   file[:doc].search('div[@id="rs"]').each do |rs|
         
     | 
| 
      
 96 
     | 
    
         
            +
              #     rs.css('a').each do |link|
         
     | 
| 
      
 97 
     | 
    
         
            +
              #       result << link.content
         
     | 
| 
      
 98 
     | 
    
         
            +
              #     end
         
     | 
| 
      
 99 
     | 
    
         
            +
              #   end
         
     | 
| 
      
 100 
     | 
    
         
            +
              #   result
         
     | 
| 
      
 101 
     | 
    
         
            +
              # end
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
              # def _parse_result_num(file)
         
     | 
| 
      
 104 
     | 
    
         
            +
              #   html = file[:html]
         
     | 
| 
      
 105 
     | 
    
         
            +
              #   str = html.scan(/百度为您找到相关结果(.*)个/).join
         
     | 
| 
      
 106 
     | 
    
         
            +
              #   str = str.gsub('约','')
         
     | 
| 
      
 107 
     | 
    
         
            +
              #   if str.include?('万')
         
     | 
| 
      
 108 
     | 
    
         
            +
              #     parts = str.split('万')
         
     | 
| 
      
 109 
     | 
    
         
            +
              #     result = parts[0].to_i * 10000 + parts[1].to_i
         
     | 
| 
      
 110 
     | 
    
         
            +
              #   else
         
     | 
| 
      
 111 
     | 
    
         
            +
              #     result = str.gsub(',', '').to_i
         
     | 
| 
      
 112 
     | 
    
         
            +
              #   end
         
     | 
| 
      
 113 
     | 
    
         
            +
              #
         
     | 
| 
      
 114 
     | 
    
         
            +
              #   result
         
     | 
| 
      
 115 
     | 
    
         
            +
              # end
         
     | 
| 
      
 116 
     | 
    
         
            +
             
     | 
| 
      
 117 
     | 
    
         
            +
              # def _parse_right_hotel(file)
         
     | 
| 
      
 118 
     | 
    
         
            +
              #   rh = file[:doc].search('div[@tpl="right_hotel"]')
         
     | 
| 
      
 119 
     | 
    
         
            +
              #   return nil if rh.nil?
         
     | 
| 
      
 120 
     | 
    
         
            +
              #
         
     | 
| 
      
 121 
     | 
    
         
            +
              #   rh = rh.first
         
     | 
| 
      
 122 
     | 
    
         
            +
              #   return nil if rh.nil?
         
     | 
| 
      
 123 
     | 
    
         
            +
              #   title = Serper::Helper.get_content_safe(rh.search('div.opr-hotel-title'))
         
     | 
| 
      
 124 
     | 
    
         
            +
              #
         
     | 
| 
      
 125 
     | 
    
         
            +
              #   {:title => title}
         
     | 
| 
      
 126 
     | 
    
         
            +
              # end
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
              # def _parse_right_personinfo(file)
         
     | 
| 
      
 129 
     | 
    
         
            +
              #   rp = file[:doc].search('div[@tpl="right_personinfo"]')
         
     | 
| 
      
 130 
     | 
    
         
            +
              #   return nil if rp.nil?
         
     | 
| 
      
 131 
     | 
    
         
            +
              #
         
     | 
| 
      
 132 
     | 
    
         
            +
              #   title = Serper::Helper.get_content_safe rp.search('span.opr-personinfo-subtitle-large')
         
     | 
| 
      
 133 
     | 
    
         
            +
              #   info_summary = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-summary')
         
     | 
| 
      
 134 
     | 
    
         
            +
              #   info = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-info')
         
     | 
| 
      
 135 
     | 
    
         
            +
              #   source = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-source a')
         
     | 
| 
      
 136 
     | 
    
         
            +
              #
         
     | 
| 
      
 137 
     | 
    
         
            +
              #   return nil if title.nil? && info.nil? && source.nil?
         
     | 
| 
      
 138 
     | 
    
         
            +
              #   {:title => title, :info_summary => info_summary, :info => info, :source => source}
         
     | 
| 
      
 139 
     | 
    
         
            +
              # end
         
     | 
| 
      
 140 
     | 
    
         
            +
             
     | 
| 
      
 141 
     | 
    
         
            +
              # def _parse_right_relaperson(file)
         
     | 
| 
      
 142 
     | 
    
         
            +
              #   relapersons = file[:doc].search('div[@tpl="right_relaperson"]')
         
     | 
| 
      
 143 
     | 
    
         
            +
              #   return nil if relapersons.nil?
         
     | 
| 
      
 144 
     | 
    
         
            +
              #
         
     | 
| 
      
 145 
     | 
    
         
            +
              #   result = []
         
     | 
| 
      
 146 
     | 
    
         
            +
              #   relapersons.each do |rr|
         
     | 
| 
      
 147 
     | 
    
         
            +
              #     title = rr.search('div.cr-title/span').first
         
     | 
| 
      
 148 
     | 
    
         
            +
              #     title = title.content unless title.nil?
         
     | 
| 
      
 149 
     | 
    
         
            +
              #     r = []
         
     | 
| 
      
 150 
     | 
    
         
            +
              #     rr.search('p.opr-relaperson-name/a').each do |p|
         
     | 
| 
      
 151 
     | 
    
         
            +
              #       r << p['title']
         
     | 
| 
      
 152 
     | 
    
         
            +
              #     end
         
     | 
| 
      
 153 
     | 
    
         
            +
              #     result << {:title => title, :names => r}
         
     | 
| 
      
 154 
     | 
    
         
            +
              #   end
         
     | 
| 
      
 155 
     | 
    
         
            +
              #   result
         
     | 
| 
      
 156 
     | 
    
         
            +
              # end
         
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
      
 158 
     | 
    
         
            +
              # def _parse_right_weather(file)
         
     | 
| 
      
 159 
     | 
    
         
            +
              #   rw = file[:doc].search('div[@tpl="right_weather"]')
         
     | 
| 
      
 160 
     | 
    
         
            +
              #   return nil if rw.nil?
         
     | 
| 
      
 161 
     | 
    
         
            +
              #
         
     | 
| 
      
 162 
     | 
    
         
            +
              #   rw = rw.first
         
     | 
| 
      
 163 
     | 
    
         
            +
              #   return nil if rw.nil?
         
     | 
| 
      
 164 
     | 
    
         
            +
              #
         
     | 
| 
      
 165 
     | 
    
         
            +
              #   title = Serper::Helper.get_content_safe(rw.search('div.opr-weather-title'))
         
     | 
| 
      
 166 
     | 
    
         
            +
              #   week = rw.search('a.opr-weather-week').first['href']
         
     | 
| 
      
 167 
     | 
    
         
            +
              #
         
     | 
| 
      
 168 
     | 
    
         
            +
              #   {:title => title, :week => week}
         
     | 
| 
      
 169 
     | 
    
         
            +
              # end
         
     | 
| 
      
 170 
     | 
    
         
            +
             
     | 
| 
      
 171 
     | 
    
         
            +
              def _parse_zhixin(file)
         
     | 
| 
      
 172 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 173 
     | 
    
         
            +
                file[:doc].search("div#content_left .result-zxl").each do |zxl|
         
     | 
| 
      
 174 
     | 
    
         
            +
                  result << {:id => zxl['id'],
         
     | 
| 
      
 175 
     | 
    
         
            +
                             :srcid => zxl['srcid'],
         
     | 
| 
      
 176 
     | 
    
         
            +
                             :fk => zxl['fk'],
         
     | 
| 
      
 177 
     | 
    
         
            +
                             :tpl => zxl['tpl'],
         
     | 
| 
      
 178 
     | 
    
         
            +
                             :mu => zxl['mu'],
         
     | 
| 
      
 179 
     | 
    
         
            +
                             :data_click => Serper::Helper.parse_data_click(zxl['data-click'])
         
     | 
| 
      
 180 
     | 
    
         
            +
                  }
         
     | 
| 
      
 181 
     | 
    
         
            +
                end
         
     | 
| 
      
 182 
     | 
    
         
            +
                result
         
     | 
| 
      
 183 
     | 
    
         
            +
              end
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,144 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Serper::Baidu
         
     | 
| 
      
 2 
     | 
    
         
            +
              def weight_config
         
     | 
| 
      
 3 
     | 
    
         
            +
                {
         
     | 
| 
      
 4 
     | 
    
         
            +
                    :left_parts => [:ads_top,
         
     | 
| 
      
 5 
     | 
    
         
            +
                                    :zhixin,
         
     | 
| 
      
 6 
     | 
    
         
            +
                                    :ranks
         
     | 
| 
      
 7 
     | 
    
         
            +
                    ],
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                    :right_parts => [:con_ar,
         
     | 
| 
      
 10 
     | 
    
         
            +
                                     :ads_right
         
     | 
| 
      
 11 
     | 
    
         
            +
                    ],
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                    :left_part_weight => 8,
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                    :right_part_weight => 2,
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                    :zhixin_weight => 3.5,
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                    :baiduopen_weight => 3,
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                    :rank_special_weight => 2,
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                    :con_ar_weight => 2
         
     | 
| 
      
 24 
     | 
    
         
            +
                }
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
              # _weight_of_*** functions
         
     | 
| 
      
 28 
     | 
    
         
            +
              # return a hash array
         
     | 
| 
      
 29 
     | 
    
         
            +
              # each hash includes: type, name, site, weight
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
              def _weight_of_ranks(serp_result,side_rank)
         
     | 
| 
      
 32 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 33 
     | 
    
         
            +
                serp_result[:ranks].each.with_index do |rank,i|
         
     | 
| 
      
 34 
     | 
    
         
            +
                  side_rank += 1
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                  url = rank[:url].to_s
         
     | 
| 
      
 37 
     | 
    
         
            +
                  mu = rank[:mu].to_s
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                  type = 'SEO'
         
     | 
| 
      
 40 
     | 
    
         
            +
                  type = 'Special' if rank[:baiduopen]
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                  unless mu.empty?
         
     | 
| 
      
 43 
     | 
    
         
            +
                    url = mu
         
     | 
| 
      
 44 
     | 
    
         
            +
                    type = 'Special'
         
     | 
| 
      
 45 
     | 
    
         
            +
                  end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                  site = Serper::Helper.parse_site(url)
         
     | 
| 
      
 48 
     | 
    
         
            +
                  subdomain = Serper::Helper.parse_subdomain(url)
         
     | 
| 
      
 49 
     | 
    
         
            +
                  path = Serper::Helper.parse_path(url)
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                  name = rank[:tpl].to_s
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                  weight = 1.0/side_rank.to_f
         
     | 
| 
      
 54 
     | 
    
         
            +
                  if type == 'Special'
         
     | 
| 
      
 55 
     | 
    
         
            +
                    if rank[:baiduopen]
         
     | 
| 
      
 56 
     | 
    
         
            +
                      weight = weight * weight_config[:baiduopen_weight].to_f
         
     | 
| 
      
 57 
     | 
    
         
            +
                    else
         
     | 
| 
      
 58 
     | 
    
         
            +
                      weight = weight * weight_config[:rank_special_weight].to_f
         
     | 
| 
      
 59 
     | 
    
         
            +
                    end
         
     | 
| 
      
 60 
     | 
    
         
            +
                  end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                  part_rank = rank[:rank]
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                  result << {type: type, name: name, site: site, subdomain: subdomain, path: path, mu: mu, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
         
     | 
| 
      
 65 
     | 
    
         
            +
                end
         
     | 
| 
      
 66 
     | 
    
         
            +
                [result, side_rank]
         
     | 
| 
      
 67 
     | 
    
         
            +
              end
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
              def _weight_of_ads_top(serp_result,side_rank)
         
     | 
| 
      
 70 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 71 
     | 
    
         
            +
                serp_result[:ads_top].each.with_index do |ad,i|
         
     | 
| 
      
 72 
     | 
    
         
            +
                  side_rank += 1
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                  url = ad[:url].to_s
         
     | 
| 
      
 75 
     | 
    
         
            +
                  type = 'SEM'
         
     | 
| 
      
 76 
     | 
    
         
            +
                  name = ''
         
     | 
| 
      
 77 
     | 
    
         
            +
                  site = Serper::Helper.parse_site(url)
         
     | 
| 
      
 78 
     | 
    
         
            +
                  subdomain = Serper::Helper.parse_subdomain(url)
         
     | 
| 
      
 79 
     | 
    
         
            +
                  path = Serper::Helper.parse_path(url)
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                  part_rank = ad[:rank]
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                  weight = 1.0/side_rank.to_f
         
     | 
| 
      
 84 
     | 
    
         
            +
                  result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
         
     | 
| 
      
 85 
     | 
    
         
            +
                end
         
     | 
| 
      
 86 
     | 
    
         
            +
                [result, side_rank]
         
     | 
| 
      
 87 
     | 
    
         
            +
              end
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
              def _weight_of_ads_right(serp_result,side_rank)
         
     | 
| 
      
 90 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 91 
     | 
    
         
            +
                serp_result[:ads_right].each.with_index do |ad,i|
         
     | 
| 
      
 92 
     | 
    
         
            +
                  side_rank += 1
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                  url = ad[:url].to_s
         
     | 
| 
      
 95 
     | 
    
         
            +
                  type = 'SEM'
         
     | 
| 
      
 96 
     | 
    
         
            +
                  name = ''
         
     | 
| 
      
 97 
     | 
    
         
            +
                  site = Serper::Helper.parse_site(url)
         
     | 
| 
      
 98 
     | 
    
         
            +
                  subdomain = Serper::Helper.parse_subdomain(url)
         
     | 
| 
      
 99 
     | 
    
         
            +
                  path = Serper::Helper.parse_path(url)
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
                  part_rank = ad[:rank]
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
                  weight = 1.0/side_rank.to_f
         
     | 
| 
      
 104 
     | 
    
         
            +
                  result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
         
     | 
| 
      
 105 
     | 
    
         
            +
                end
         
     | 
| 
      
 106 
     | 
    
         
            +
                [result, side_rank]
         
     | 
| 
      
 107 
     | 
    
         
            +
              end
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
              def _weight_of_con_ar(serp_result,side_rank)
         
     | 
| 
      
 110 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 111 
     | 
    
         
            +
                serp_result[:con_ar].each.with_index do |con,i|
         
     | 
| 
      
 112 
     | 
    
         
            +
                  side_rank += 1
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                  url = con[:data_click]['mu'].to_s
         
     | 
| 
      
 115 
     | 
    
         
            +
                  type = 'Special'
         
     | 
| 
      
 116 
     | 
    
         
            +
                  name = con[:tpl]
         
     | 
| 
      
 117 
     | 
    
         
            +
                  site = Serper::Helper.parse_site(url)
         
     | 
| 
      
 118 
     | 
    
         
            +
                  subdomain = Serper::Helper.parse_subdomain(url)
         
     | 
| 
      
 119 
     | 
    
         
            +
                  path = Serper::Helper.parse_path(url)
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                  weight = 1.0 * weight_config[:con_ar_weight]
         
     | 
| 
      
 122 
     | 
    
         
            +
                  result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: i+1, side_weight: weight}
         
     | 
| 
      
 123 
     | 
    
         
            +
                end
         
     | 
| 
      
 124 
     | 
    
         
            +
                [result, side_rank]
         
     | 
| 
      
 125 
     | 
    
         
            +
              end
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
              def _weight_of_zhixin(serp_result,side_rank)
         
     | 
| 
      
 128 
     | 
    
         
            +
                result = []
         
     | 
| 
      
 129 
     | 
    
         
            +
                serp_result[:zhixin].each.with_index do |zhixin,i|
         
     | 
| 
      
 130 
     | 
    
         
            +
                  side_rank += 1
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
                  url = zhixin[:mu].to_s
         
     | 
| 
      
 133 
     | 
    
         
            +
                  type = 'Special'
         
     | 
| 
      
 134 
     | 
    
         
            +
                  name = zhixin[:tpl]
         
     | 
| 
      
 135 
     | 
    
         
            +
                  site = Serper::Helper.parse_site(url)
         
     | 
| 
      
 136 
     | 
    
         
            +
                  subdomain = Serper::Helper.parse_subdomain(url)
         
     | 
| 
      
 137 
     | 
    
         
            +
                  weight = 1.0 * weight_config[:zhixin_weight]
         
     | 
| 
      
 138 
     | 
    
         
            +
                  path = Serper::Helper.parse_path(url)
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
                  result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: i+1, side_weight: weight}
         
     | 
| 
      
 141 
     | 
    
         
            +
                end
         
     | 
| 
      
 142 
     | 
    
         
            +
                [result, side_rank]
         
     | 
| 
      
 143 
     | 
    
         
            +
              end
         
     | 
| 
      
 144 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,84 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'httparty'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Serper
         
     | 
| 
      
 4 
     | 
    
         
            +
              class Crawler
         
     | 
| 
      
 5 
     | 
    
         
            +
                AllUserAgents = YAML.load(open(File.expand_path('../user_agents.yml',__FILE__)))
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def self.rand_ua
         
     | 
| 
      
 8 
     | 
    
         
            +
                  AllUserAgents[rand(AllUserAgents.size)]
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                include HTTParty
         
     | 
| 
      
 12 
     | 
    
         
            +
                base_uri 'www.baidu.com'
         
     | 
| 
      
 13 
     | 
    
         
            +
                follow_redirects false
         
     | 
| 
      
 14 
     | 
    
         
            +
                headers "User-Agent" => self.rand_ua, "Referer" => 'http://www.baidu.com/'
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                def self.get_serp(url,retries = 3)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  self.new.get_serp(url,retries)
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                def self.get_rank_url(url)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  self.new.get_rank_url(url)
         
     | 
| 
      
 22 
     | 
    
         
            +
                end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                def get_rank_url(url)
         
     | 
| 
      
 25 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 26 
     | 
    
         
            +
                    response = self.class.get(url)
         
     | 
| 
      
 27 
     | 
    
         
            +
                  rescue StandardError => e
         
     | 
| 
      
 28 
     | 
    
         
            +
                    puts e.class
         
     | 
| 
      
 29 
     | 
    
         
            +
                    puts e.message
         
     | 
| 
      
 30 
     | 
    
         
            +
                    sleep(10)
         
     | 
| 
      
 31 
     | 
    
         
            +
                    retry
         
     | 
| 
      
 32 
     | 
    
         
            +
                  end
         
     | 
| 
      
 33 
     | 
    
         
            +
                  response
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                def get_serp(url, retries = 3)
         
     | 
| 
      
 37 
     | 
    
         
            +
                  if retries > 0
         
     | 
| 
      
 38 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 39 
     | 
    
         
            +
                      response = self.class.get(url)
         
     | 
| 
      
 40 
     | 
    
         
            +
                    rescue StandardError => e
         
     | 
| 
      
 41 
     | 
    
         
            +
                      puts e.class
         
     | 
| 
      
 42 
     | 
    
         
            +
                      puts e.message
         
     | 
| 
      
 43 
     | 
    
         
            +
                      sleep(10)
         
     | 
| 
      
 44 
     | 
    
         
            +
                      retry
         
     | 
| 
      
 45 
     | 
    
         
            +
                    end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                    if response.code != 200
         
     | 
| 
      
 48 
     | 
    
         
            +
                      puts response
         
     | 
| 
      
 49 
     | 
    
         
            +
                      puts "Retry on URL: #{url}"
         
     | 
| 
      
 50 
     | 
    
         
            +
                      sleep(rand(60)+1200)
         
     | 
| 
      
 51 
     | 
    
         
            +
                      response = self.class.get_serp(url,retries - 1)
         
     | 
| 
      
 52 
     | 
    
         
            +
                    end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                    if response.nil?
         
     | 
| 
      
 55 
     | 
    
         
            +
                      puts "Still error after 3 tries, sleep 3600s now."
         
     | 
| 
      
 56 
     | 
    
         
            +
                      sleep(3600)
         
     | 
| 
      
 57 
     | 
    
         
            +
                      response = self.class.get_serp(url)
         
     | 
| 
      
 58 
     | 
    
         
            +
                    end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                    ##Baidu Stopped response Content-Length in headers...
         
     | 
| 
      
 61 
     | 
    
         
            +
                    #if response.headers['Content-Length'].nil?
         
     | 
| 
      
 62 
     | 
    
         
            +
                    #  puts "Can't read Content-Length from response, retry."
         
     | 
| 
      
 63 
     | 
    
         
            +
                    #  response = self.class.get_serp(url,retries-1)
         
     | 
| 
      
 64 
     | 
    
         
            +
                    #end
         
     | 
| 
      
 65 
     | 
    
         
            +
                    #
         
     | 
| 
      
 66 
     | 
    
         
            +
                    #if response.headers['Content-Length'].to_i != response.body.bytesize
         
     | 
| 
      
 67 
     | 
    
         
            +
                    #  issue_file = "/tmp/serper_crawler_issue_#{Time.now.strftime("%Y%m%d%H%M%S")}.html"
         
     | 
| 
      
 68 
     | 
    
         
            +
                    #  open(issue_file,'w').puts(response.body)
         
     | 
| 
      
 69 
     | 
    
         
            +
                    #  puts "Notice:"
         
     | 
| 
      
 70 
     | 
    
         
            +
                    #  puts "Serper get an error when crawl SERP: response size (#{response.headers['Content-Length']}) not match body size."
         
     | 
| 
      
 71 
     | 
    
         
            +
                    #  puts "Please see file #{issue_file} for body content."
         
     | 
| 
      
 72 
     | 
    
         
            +
                    #  puts "Sleep 10s and retry"
         
     | 
| 
      
 73 
     | 
    
         
            +
                    #  sleep(10)
         
     | 
| 
      
 74 
     | 
    
         
            +
                    #  response = self.class.get_serp(url)
         
     | 
| 
      
 75 
     | 
    
         
            +
                    #end
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
                    response
         
     | 
| 
      
 78 
     | 
    
         
            +
                  else
         
     | 
| 
      
 79 
     | 
    
         
            +
                    nil
         
     | 
| 
      
 80 
     | 
    
         
            +
                  end
         
     | 
| 
      
 81 
     | 
    
         
            +
                end
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
              end
         
     | 
| 
      
 84 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,79 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'domainatrix'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Serper
         
     | 
| 
      
 4 
     | 
    
         
            +
              module Helper
         
     | 
| 
      
 5 
     | 
    
         
            +
                class << self
         
     | 
| 
      
 6 
     | 
    
         
            +
                  # get content safe from nokogiri search reasult
         
     | 
| 
      
 7 
     | 
    
         
            +
                  def get_content_safe(noko)
         
     | 
| 
      
 8 
     | 
    
         
            +
                    return nil if noko.nil?
         
     | 
| 
      
 9 
     | 
    
         
            +
                    return nil if noko.empty?
         
     | 
| 
      
 10 
     | 
    
         
            +
                    noko.first.content.strip
         
     | 
| 
      
 11 
     | 
    
         
            +
                  end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  # parse data click value from baidu div property,
         
     | 
| 
      
 14 
     | 
    
         
            +
                  # which is a JSON like format
         
     | 
| 
      
 15 
     | 
    
         
            +
                  def parse_data_click(str)
         
     | 
| 
      
 16 
     | 
    
         
            +
                    JSON.parse(str
         
     | 
| 
      
 17 
     | 
    
         
            +
                                 .gsub("'",'"')
         
     | 
| 
      
 18 
     | 
    
         
            +
                                 .gsub(/({|,)([a-zA-Z0-9_]+):/, '\1"\2":')
         
     | 
| 
      
 19 
     | 
    
         
            +
                                 #.gsub(/'*([a-zA-Z0-9_]+)'*:/, '"\1":')
         
     | 
| 
      
 20 
     | 
    
         
            +
                                 #.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2')
         
     | 
| 
      
 21 
     | 
    
         
            +
                               )
         
     | 
| 
      
 22 
     | 
    
         
            +
                  end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                  # normalize weight of given data,
         
     | 
| 
      
 25 
     | 
    
         
            +
                  # the data must be a hash array structure.
         
     | 
| 
      
 26 
     | 
    
         
            +
                  # for example : [{a: 1, b: 2}, {a: 2, b: 3}]
         
     | 
| 
      
 27 
     | 
    
         
            +
                  def normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
         
     | 
| 
      
 28 
     | 
    
         
            +
                    total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f}
         
     | 
| 
      
 29 
     | 
    
         
            +
                    data.each do|d|
         
     | 
| 
      
 30 
     | 
    
         
            +
                      d[normalized_col] = d[weight_col].to_f/total_weight
         
     | 
| 
      
 31 
     | 
    
         
            +
                    end
         
     | 
| 
      
 32 
     | 
    
         
            +
                    data
         
     | 
| 
      
 33 
     | 
    
         
            +
                  end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                  def parse_site(url)
         
     | 
| 
      
 36 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 37 
     | 
    
         
            +
                      url = Domainatrix.parse(url.to_s)
         
     | 
| 
      
 38 
     | 
    
         
            +
                      site = url.domain + '.' + url.public_suffix
         
     | 
| 
      
 39 
     | 
    
         
            +
                    rescue Exception => e
         
     | 
| 
      
 40 
     | 
    
         
            +
                      puts "parse_site from url error:"
         
     | 
| 
      
 41 
     | 
    
         
            +
                      puts url
         
     | 
| 
      
 42 
     | 
    
         
            +
                      puts e.class
         
     | 
| 
      
 43 
     | 
    
         
            +
                      puts e.message
         
     | 
| 
      
 44 
     | 
    
         
            +
                      site = ''
         
     | 
| 
      
 45 
     | 
    
         
            +
                    end
         
     | 
| 
      
 46 
     | 
    
         
            +
                    site
         
     | 
| 
      
 47 
     | 
    
         
            +
                  end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                  def parse_subdomain(url)
         
     | 
| 
      
 50 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 51 
     | 
    
         
            +
                      url = Domainatrix.parse(url.to_s)
         
     | 
| 
      
 52 
     | 
    
         
            +
                      subdomain = url.subdomain
         
     | 
| 
      
 53 
     | 
    
         
            +
                    rescue Exception => e
         
     | 
| 
      
 54 
     | 
    
         
            +
                      puts "parse_site from url error:"
         
     | 
| 
      
 55 
     | 
    
         
            +
                      puts url
         
     | 
| 
      
 56 
     | 
    
         
            +
                      puts e.class
         
     | 
| 
      
 57 
     | 
    
         
            +
                      puts e.message
         
     | 
| 
      
 58 
     | 
    
         
            +
                      subdomain = ''
         
     | 
| 
      
 59 
     | 
    
         
            +
                    end
         
     | 
| 
      
 60 
     | 
    
         
            +
                    subdomain
         
     | 
| 
      
 61 
     | 
    
         
            +
                  end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                  def parse_path(url)
         
     | 
| 
      
 64 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 65 
     | 
    
         
            +
                      url = Domainatrix.parse(url.to_s)
         
     | 
| 
      
 66 
     | 
    
         
            +
                      path = url.path
         
     | 
| 
      
 67 
     | 
    
         
            +
                    rescue Exception => e
         
     | 
| 
      
 68 
     | 
    
         
            +
                      puts "parse_site from url error:"
         
     | 
| 
      
 69 
     | 
    
         
            +
                      puts url
         
     | 
| 
      
 70 
     | 
    
         
            +
                      puts e.class
         
     | 
| 
      
 71 
     | 
    
         
            +
                      puts e.message
         
     | 
| 
      
 72 
     | 
    
         
            +
                      path = ''
         
     | 
| 
      
 73 
     | 
    
         
            +
                    end
         
     | 
| 
      
 74 
     | 
    
         
            +
                    path
         
     | 
| 
      
 75 
     | 
    
         
            +
                  end
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
                end
         
     | 
| 
      
 78 
     | 
    
         
            +
              end
         
     | 
| 
      
 79 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,77 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- coding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'nokogiri'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'uri'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'json'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'serper/crawler'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'serper/helper'
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            module Serper
         
     | 
| 
      
 9 
     | 
    
         
            +
              class Parser
         
     | 
| 
      
 10 
     | 
    
         
            +
                attr_reader :engine_name, :keyword, :page, :html, :doc, :result
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                def initialize(engine_name,keyword,page=1)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @engine_name = engine_name
         
     | 
| 
      
 14 
     | 
    
         
            +
                  @engine = ENGINES[@engine_name].new
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @keyword = keyword
         
     | 
| 
      
 16 
     | 
    
         
            +
                  @page = page
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                def serp_url
         
     | 
| 
      
 20 
     | 
    
         
            +
                  @engine.serp_url(@keyword,@page)
         
     | 
| 
      
 21 
     | 
    
         
            +
                end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                def search
         
     | 
| 
      
 24 
     | 
    
         
            +
                  html = Crawler.get_serp(serp_url).body
         
     | 
| 
      
 25 
     | 
    
         
            +
                  parse html
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                def parse(html)
         
     | 
| 
      
 29 
     | 
    
         
            +
                  html = html.encode!('UTF-8','UTF-8',:invalid => :replace)
         
     | 
| 
      
 30 
     | 
    
         
            +
                  @file = Hash.new
         
     | 
| 
      
 31 
     | 
    
         
            +
                  @result = Hash.new
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                  @file[:html] = html
         
     | 
| 
      
 34 
     | 
    
         
            +
                  @file[:doc] = Nokogiri::HTML(html)
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                  @engine.methods.each do |m|
         
     | 
| 
      
 37 
     | 
    
         
            +
                    next unless m =~ /^_parse_/
         
     | 
| 
      
 38 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 39 
     | 
    
         
            +
                      @result[m.to_s.sub('_parse_','').to_sym] = @engine.send m,@file
         
     | 
| 
      
 40 
     | 
    
         
            +
                    rescue Exception => e
         
     | 
| 
      
 41 
     | 
    
         
            +
                      issue_file = "/tmp/serper_issue_#{Time.now.strftime("%Y%m%d%H%M%S")}.html"
         
     | 
| 
      
 42 
     | 
    
         
            +
                      open(issue_file,'w').puts(html)
         
     | 
| 
      
 43 
     | 
    
         
            +
                      puts "Notice:"
         
     | 
| 
      
 44 
     | 
    
         
            +
                      puts "Serper gem have a bug, please email to zmingqian@qq.com to report it."
         
     | 
| 
      
 45 
     | 
    
         
            +
                      puts "Please attach file #{issue_file} in the email and the error information below, thanks!"
         
     | 
| 
      
 46 
     | 
    
         
            +
                      puts e.message
         
     | 
| 
      
 47 
     | 
    
         
            +
                      puts e.inspect
         
     | 
| 
      
 48 
     | 
    
         
            +
                      puts e.backtrace
         
     | 
| 
      
 49 
     | 
    
         
            +
                      raise "Serper Parser Get An Error!"
         
     | 
| 
      
 50 
     | 
    
         
            +
                    end
         
     | 
| 
      
 51 
     | 
    
         
            +
                  end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                  @result
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
                def weights
         
     | 
| 
      
 58 
     | 
    
         
            +
                  result = []
         
     | 
| 
      
 59 
     | 
    
         
            +
                  [:left,:right].each do |side|
         
     | 
| 
      
 60 
     | 
    
         
            +
                    side_rank = 0
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                    @engine.weight_config["#{side}_parts".to_sym].each do |part|
         
     | 
| 
      
 63 
     | 
    
         
            +
                      rs,side_rank = @engine.send("_weight_of_#{part}",@result,side_rank)
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                      rs.each do |r|
         
     | 
| 
      
 66 
     | 
    
         
            +
                        r[:side] = side.to_s
         
     | 
| 
      
 67 
     | 
    
         
            +
                        r[:part] = part
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                        r[:weight] = r[:weight].to_f * @engine.weight_config["#{side}_part_weight".to_sym].to_f
         
     | 
| 
      
 70 
     | 
    
         
            +
                        result << r
         
     | 
| 
      
 71 
     | 
    
         
            +
                      end
         
     | 
| 
      
 72 
     | 
    
         
            +
                    end
         
     | 
| 
      
 73 
     | 
    
         
            +
                  end
         
     | 
| 
      
 74 
     | 
    
         
            +
                  Serper::Helper.normalize(result,:side_weight,:weight)
         
     | 
| 
      
 75 
     | 
    
         
            +
                end
         
     | 
| 
      
 76 
     | 
    
         
            +
              end
         
     | 
| 
      
 77 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,183 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            - 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/527  (KHTML, like Gecko,
         
     | 
| 
      
 3 
     | 
    
         
            +
              Safari/419.3) Arora/0.6 (Change: )'
         
     | 
| 
      
 4 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; ; en-NZ) AppleWebKit/527  (KHTML, like Gecko, Safari/419.3)
         
     | 
| 
      
 5 
     | 
    
         
            +
              Arora/0.8.0
         
     | 
| 
      
 6 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser; Avant Browser;
         
     | 
| 
      
 7 
     | 
    
         
            +
              .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0; .NET CLR 2.0.50727; .NET
         
     | 
| 
      
 8 
     | 
    
         
            +
              CLR 3.0.04506.30)
         
     | 
| 
      
 9 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.8 (KHTML, like Gecko) Beamrise/17.2.0.9
         
     | 
| 
      
 10 
     | 
    
         
            +
              Chrome/17.0.939.0 Safari/535.8
         
     | 
| 
      
 11 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0
         
     | 
| 
      
 12 
     | 
    
         
            +
              Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0
         
     | 
| 
      
 13 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1
         
     | 
| 
      
 14 
     | 
    
         
            +
              Safari/536.3
         
     | 
| 
      
 15 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0
         
     | 
| 
      
 16 
     | 
    
         
            +
              Safari/536.6
         
     | 
| 
      
 17 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0
         
     | 
| 
      
 18 
     | 
    
         
            +
              Safari/536.6
         
     | 
| 
      
 19 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1
         
     | 
| 
      
 20 
     | 
    
         
            +
              Safari/537.1
         
     | 
| 
      
 21 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/28.0.1469.0
         
     | 
| 
      
 22 
     | 
    
         
            +
              Safari/537.36
         
     | 
| 
      
 23 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/28.0.1469.0
         
     | 
| 
      
 24 
     | 
    
         
            +
              Safari/537.36
         
     | 
| 
      
 25 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20120403211507 Firefox/12.0
         
     | 
| 
      
 26 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.0; rv:14.0) Gecko/20100101 Firefox/14.0.1
         
     | 
| 
      
 27 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1
         
     | 
| 
      
 28 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0
         
     | 
| 
      
 29 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.2; rv:19.0) Gecko/20121129 Firefox/19.0
         
     | 
| 
      
 30 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/20.0
         
     | 
| 
      
 31 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0
         
     | 
| 
      
 32 
     | 
    
         
            +
            - Mozilla/5.0 (compatible; Konqueror/4.5; Windows) KHTML/4.5.4 (like Gecko)
         
     | 
| 
      
 33 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR
         
     | 
| 
      
 34 
     | 
    
         
            +
              2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Maxthon
         
     | 
| 
      
 35 
     | 
    
         
            +
              2.0)
         
     | 
| 
      
 36 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.1 (KHTML, like Gecko)
         
     | 
| 
      
 37 
     | 
    
         
            +
              Maxthon/3.0.8.2 Safari/533.1
         
     | 
| 
      
 38 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML like Gecko) Maxthon/4.0.0.2000
         
     | 
| 
      
 39 
     | 
    
         
            +
              Chrome/22.0.1229.79 Safari/537.1
         
     | 
| 
      
 40 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
         
     | 
| 
      
 41 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)
         
     | 
| 
      
 42 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727;
         
     | 
| 
      
 43 
     | 
    
         
            +
              .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)
         
     | 
| 
      
 44 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)
         
     | 
| 
      
 45 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/4.0)
         
     | 
| 
      
 46 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)
         
     | 
| 
      
 47 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)
         
     | 
| 
      
 48 
     | 
    
         
            +
            - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
         
     | 
| 
      
 49 
     | 
    
         
            +
            - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.2; Trident/5.0)
         
     | 
| 
      
 50 
     | 
    
         
            +
            - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.2; WOW64; Trident/5.0)
         
     | 
| 
      
 51 
     | 
    
         
            +
            - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media
         
     | 
| 
      
 52 
     | 
    
         
            +
              Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)
         
     | 
| 
      
 53 
     | 
    
         
            +
            - Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)
         
     | 
| 
      
 54 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0)
         
     | 
| 
      
 55 
     | 
    
         
            +
            - Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1;
         
     | 
| 
      
 56 
     | 
    
         
            +
              .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0
         
     | 
| 
      
 57 
     | 
    
         
            +
            - Opera/9.25 (Windows NT 6.0; U; en)
         
     | 
| 
      
 58 
     | 
    
         
            +
            - Opera/9.80 (Windows NT 5.2; U; en) Presto/2.2.15 Version/10.10
         
     | 
| 
      
 59 
     | 
    
         
            +
            - Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.7.39 Version/11.00
         
     | 
| 
      
 60 
     | 
    
         
            +
            - Opera/9.80 (Windows NT 6.1; U; en) Presto/2.7.62 Version/11.01
         
     | 
| 
      
 61 
     | 
    
         
            +
            - Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10
         
     | 
| 
      
 62 
     | 
    
         
            +
            - Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00
         
     | 
| 
      
 63 
     | 
    
         
            +
            - Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14
         
     | 
| 
      
 64 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like
         
     | 
| 
      
 65 
     | 
    
         
            +
              Gecko) Version/4.0.4 Safari/531.21.10
         
     | 
| 
      
 66 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like
         
     | 
| 
      
 67 
     | 
    
         
            +
              Gecko) Version/5.0.1 Safari/533.17.8
         
     | 
| 
      
 68 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like
         
     | 
| 
      
 69 
     | 
    
         
            +
              Gecko) Version/5.0.2 Safari/533.18.5
         
     | 
| 
      
 70 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.2; es-US ) AppleWebKit/540.0 (KHTML like Gecko)
         
     | 
| 
      
 71 
     | 
    
         
            +
              Version/6.0 Safari/8900.00
         
     | 
| 
      
 72 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like
         
     | 
| 
      
 73 
     | 
    
         
            +
              Firefox/3.x) SeaMonkey/2.0.12
         
     | 
| 
      
 74 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 5.2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1 SeaMonkey/2.7.1
         
     | 
| 
      
 75 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20120422 Firefox/12.0 SeaMonkey/2.9
         
     | 
| 
      
 76 
     | 
    
         
            +
            - Avant Browser/1.2.789rel1 (http://www.avantbrowser.com)
         
     | 
| 
      
 77 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko)
         
     | 
| 
      
 78 
     | 
    
         
            +
              Chrome/4.0.249.0 Safari/532.5
         
     | 
| 
      
 79 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko)
         
     | 
| 
      
 80 
     | 
    
         
            +
              Chrome/5.0.310.0 Safari/532.9
         
     | 
| 
      
 81 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko)
         
     | 
| 
      
 82 
     | 
    
         
            +
              Chrome/7.0.514.0 Safari/534.7
         
     | 
| 
      
 83 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like
         
     | 
| 
      
 84 
     | 
    
         
            +
              Gecko) Chrome/9.0.601.0 Safari/534.14
         
     | 
| 
      
 85 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like
         
     | 
| 
      
 86 
     | 
    
         
            +
              Gecko) Chrome/10.0.601.0 Safari/534.14
         
     | 
| 
      
 87 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like
         
     | 
| 
      
 88 
     | 
    
         
            +
              Gecko) Chrome/11.0.672.2 Safari/534.20
         
     | 
| 
      
 89 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0
         
     | 
| 
      
 90 
     | 
    
         
            +
              Safari/534.27
         
     | 
| 
      
 91 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24
         
     | 
| 
      
 92 
     | 
    
         
            +
              Safari/535.1
         
     | 
| 
      
 93 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120
         
     | 
| 
      
 94 
     | 
    
         
            +
              Safari/535.2
         
     | 
| 
      
 95 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36
         
     | 
| 
      
 96 
     | 
    
         
            +
              Safari/535.7
         
     | 
| 
      
 97 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421
         
     | 
| 
      
 98 
     | 
    
         
            +
              Minefield/3.0.2pre
         
     | 
| 
      
 99 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10
         
     | 
| 
      
 100 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11
         
     | 
| 
      
 101 
     | 
    
         
            +
              (.NET CLR 3.5.30729)
         
     | 
| 
      
 102 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6
         
     | 
| 
      
 103 
     | 
    
         
            +
              GTB5
         
     | 
| 
      
 104 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 5.1; tr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8
         
     | 
| 
      
 105 
     | 
    
         
            +
              ( .NET CLR 3.5.30729; .NET4.0E)
         
     | 
| 
      
 106 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
         
     | 
| 
      
 107 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
         
     | 
| 
      
 108 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0
         
     | 
| 
      
 109 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110622 Firefox/6.0a2
         
     | 
| 
      
 110 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1
         
     | 
| 
      
 111 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
         
     | 
| 
      
 112 
     | 
    
         
            +
            - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre
         
     | 
| 
      
 113 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0 )
         
     | 
| 
      
 114 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90)
         
     | 
| 
      
 115 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows XP) Gecko MultiZilla/1.6.1.0a
         
     | 
| 
      
 116 
     | 
    
         
            +
            - Mozilla/2.02E (Win95; U)
         
     | 
| 
      
 117 
     | 
    
         
            +
            - Mozilla/3.01Gold (Win95; I)
         
     | 
| 
      
 118 
     | 
    
         
            +
            - Mozilla/4.8 [en] (Windows NT 5.1; U)
         
     | 
| 
      
 119 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.4) Gecko Netscape/7.1 (ax)
         
     | 
| 
      
 120 
     | 
    
         
            +
            - Opera/7.50 (Windows XP; U)
         
     | 
| 
      
 121 
     | 
    
         
            +
            - Opera/7.50 (Windows ME; U) [en]
         
     | 
| 
      
 122 
     | 
    
         
            +
            - Opera/7.51 (Windows NT 5.1; U) [en]
         
     | 
| 
      
 123 
     | 
    
         
            +
            - Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; en) Opera 8.0
         
     | 
| 
      
 124 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; WinNT4.0; en-US; rv:1.2b) Gecko/20021001 Phoenix/0.2
         
     | 
| 
      
 125 
     | 
    
         
            +
            - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.23) Gecko/20090825 SeaMonkey/1.1.18
         
     | 
| 
      
 126 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
         
     | 
| 
      
 127 
     | 
    
         
            +
              Camino/2.2.1
         
     | 
| 
      
 128 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b6pre) Gecko/20100907 Firefox/4.0b6pre
         
     | 
| 
      
 129 
     | 
    
         
            +
              Camino/2.2a1pre
         
     | 
| 
      
 130 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko)
         
     | 
| 
      
 131 
     | 
    
         
            +
              Chrome/19.0.1063.0 Safari/536.3
         
     | 
| 
      
 132 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.4 (KHTML like Gecko)
         
     | 
| 
      
 133 
     | 
    
         
            +
              Chrome/22.0.1229.79 Safari/537.4
         
     | 
| 
      
 134 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.31 (KHTML like Gecko)
         
     | 
| 
      
 135 
     | 
    
         
            +
              Chrome/26.0.1410.63 Safari/537.31
         
     | 
| 
      
 136 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 1083) AppleWebKit/537.36 (KHTML like Gecko)
         
     | 
| 
      
 137 
     | 
    
         
            +
              Chrome/28.0.1469.0 Safari/537.36
         
     | 
| 
      
 138 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
         
     | 
| 
      
 139 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:16.0) Gecko/20120813 Firefox/16.0
         
     | 
| 
      
 140 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:20.0) Gecko/20100101 Firefox/20.0
         
     | 
| 
      
 141 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0
         
     | 
| 
      
 142 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US) AppleWebKit/528.16 (KHTML, like
         
     | 
| 
      
 143 
     | 
    
         
            +
              Gecko, Safari/528.16) OmniWeb/v622.8.0.112941
         
     | 
| 
      
 144 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/528.16 (KHTML,
         
     | 
| 
      
 145 
     | 
    
         
            +
              like Gecko, Safari/528.16) OmniWeb/v622.8.0
         
     | 
| 
      
 146 
     | 
    
         
            +
            - Opera/9.20 (Macintosh; Intel Mac OS X; U; en)
         
     | 
| 
      
 147 
     | 
    
         
            +
            - Opera/9.80 (Macintosh; Intel Mac OS X; U; en) Presto/2.6.30 Version/10.61
         
     | 
| 
      
 148 
     | 
    
         
            +
            - Opera/9.80 (Macintosh; Intel Mac OS X 10.4.11; U; en) Presto/2.7.62 Version/11.00
         
     | 
| 
      
 149 
     | 
    
         
            +
            - Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52
         
     | 
| 
      
 150 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-us) AppleWebKit/531.21.8 (KHTML,
         
     | 
| 
      
 151 
     | 
    
         
            +
              like Gecko) Version/4.0.4 Safari/531.21.10
         
     | 
| 
      
 152 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; de-de) AppleWebKit/534.15  (KHTML,
         
     | 
| 
      
 153 
     | 
    
         
            +
              like Gecko) Version/5.0.3 Safari/533.19.4
         
     | 
| 
      
 154 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.20.25 (KHTML,
         
     | 
| 
      
 155 
     | 
    
         
            +
              like Gecko) Version/5.0.4 Safari/533.20.27
         
     | 
| 
      
 156 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7; en-us) AppleWebKit/534.20.8 (KHTML,
         
     | 
| 
      
 157 
     | 
    
         
            +
              like Gecko) Version/5.1 Safari/534.20.8
         
     | 
| 
      
 158 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like
         
     | 
| 
      
 159 
     | 
    
         
            +
              Gecko) Version/5.1.3 Safari/534.53.10
         
     | 
| 
      
 160 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/536.26.17 (KHTML like
         
     | 
| 
      
 161 
     | 
    
         
            +
              Gecko) Version/6.0.2 Safari/536.26.17
         
     | 
| 
      
 162 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.5; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
         
     | 
| 
      
 163 
     | 
    
         
            +
              SeaMonkey/2.7.1
         
     | 
| 
      
 164 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.8 (KHTML,
         
     | 
| 
      
 165 
     | 
    
         
            +
              like Gecko) Chrome/4.0.302.2 Safari/532.8
         
     | 
| 
      
 166 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML,
         
     | 
| 
      
 167 
     | 
    
         
            +
              like Gecko) Chrome/6.0.464.0 Safari/534.3
         
     | 
| 
      
 168 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML,
         
     | 
| 
      
 169 
     | 
    
         
            +
              like Gecko) Chrome/9.0.597.15 Safari/534.13
         
     | 
| 
      
 170 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko)
         
     | 
| 
      
 171 
     | 
    
         
            +
              Chrome/14.0.835.186 Safari/535.1
         
     | 
| 
      
 172 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko)
         
     | 
| 
      
 173 
     | 
    
         
            +
              Chrome/15.0.874.54 Safari/535.2
         
     | 
| 
      
 174 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko)
         
     | 
| 
      
 175 
     | 
    
         
            +
              Chrome/16.0.912.36 Safari/535.7
         
     | 
| 
      
 176 
     | 
    
         
            +
            - 'Mozilla/5.0 (Macintosh; U; Mac OS X Mach-O; en-US; rv:2.0a) Gecko/20040614 Firefox/3.0.0 '
         
     | 
| 
      
 177 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1) Gecko/20090624
         
     | 
| 
      
 178 
     | 
    
         
            +
              Firefox/3.5
         
     | 
| 
      
 179 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.14) Gecko/20110218
         
     | 
| 
      
 180 
     | 
    
         
            +
              AlexaToolbar/alxf-2.0 Firefox/3.6.14
         
     | 
| 
      
 181 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
         
     | 
| 
      
 182 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:5.0) Gecko/20100101 Firefox/5.0
         
     | 
| 
      
 183 
     | 
    
         
            +
            - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0) Gecko/20100101 Firefox/9.0
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,141 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: serper
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.0
         
     | 
| 
      
 5 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 6 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 7 
     | 
    
         
            +
            - MingQian Zhang
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 9 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 10 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2014-07-29 00:00:00.000000000 Z
         
     | 
| 
      
 12 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 13 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 14 
     | 
    
         
            +
              name: nokogiri
         
     | 
| 
      
 15 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 16 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 17 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 18 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 19 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 20 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 21 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 22 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 23 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 24 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 25 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 26 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 27 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 28 
     | 
    
         
            +
              name: httparty
         
     | 
| 
      
 29 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 30 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 31 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 32 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 33 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 34 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 35 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 36 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 37 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 38 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 39 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 40 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 41 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 42 
     | 
    
         
            +
              name: domainatrix
         
     | 
| 
      
 43 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 44 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 45 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 46 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 47 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 48 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 49 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 50 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 51 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 52 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 53 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 54 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 55 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 56 
     | 
    
         
            +
              name: activerecord
         
     | 
| 
      
 57 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 58 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 59 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 60 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 61 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 62 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 63 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 64 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 65 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 66 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 67 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 68 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 69 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 70 
     | 
    
         
            +
              name: docopt
         
     | 
| 
      
 71 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 72 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 73 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 74 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 75 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 76 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 77 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 78 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 79 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 80 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 81 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 82 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 83 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 84 
     | 
    
         
            +
              name: ruby-progressbar
         
     | 
| 
      
 85 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 86 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 87 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 88 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 89 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 90 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 91 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 92 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 93 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 94 
     | 
    
         
            +
                - - '>='
         
     | 
| 
      
 95 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 96 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 97 
     | 
    
         
            +
            description: Parse SERP result page.
         
     | 
| 
      
 98 
     | 
    
         
            +
            email:
         
     | 
| 
      
 99 
     | 
    
         
            +
            - zmingqian@qq.com
         
     | 
| 
      
 100 
     | 
    
         
            +
            executables:
         
     | 
| 
      
 101 
     | 
    
         
            +
            - serper
         
     | 
| 
      
 102 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 103 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 104 
     | 
    
         
            +
            files:
         
     | 
| 
      
 105 
     | 
    
         
            +
            - lib/serper/analyser.rb
         
     | 
| 
      
 106 
     | 
    
         
            +
            - lib/serper/baidu/crawler.rb
         
     | 
| 
      
 107 
     | 
    
         
            +
            - lib/serper/baidu/parser.rb
         
     | 
| 
      
 108 
     | 
    
         
            +
            - lib/serper/baidu/weight.rb
         
     | 
| 
      
 109 
     | 
    
         
            +
            - lib/serper/crawler.rb
         
     | 
| 
      
 110 
     | 
    
         
            +
            - lib/serper/helper.rb
         
     | 
| 
      
 111 
     | 
    
         
            +
            - lib/serper/parser.rb
         
     | 
| 
      
 112 
     | 
    
         
            +
            - lib/serper/version.rb
         
     | 
| 
      
 113 
     | 
    
         
            +
            - lib/serper.rb
         
     | 
| 
      
 114 
     | 
    
         
            +
            - bin/serper
         
     | 
| 
      
 115 
     | 
    
         
            +
            - README.md
         
     | 
| 
      
 116 
     | 
    
         
            +
            - lib/serper/user_agents.yml
         
     | 
| 
      
 117 
     | 
    
         
            +
            homepage: https://github.com/semseo/serper
         
     | 
| 
      
 118 
     | 
    
         
            +
            licenses:
         
     | 
| 
      
 119 
     | 
    
         
            +
            - MIT
         
     | 
| 
      
 120 
     | 
    
         
            +
            metadata: {}
         
     | 
| 
      
 121 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 122 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 123 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 124 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 125 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 126 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 127 
     | 
    
         
            +
              - - '>='
         
     | 
| 
      
 128 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 129 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 130 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 131 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 132 
     | 
    
         
            +
              - - '>='
         
     | 
| 
      
 133 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 134 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 135 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 136 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 137 
     | 
    
         
            +
            rubygems_version: 2.0.0
         
     | 
| 
      
 138 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 139 
     | 
    
         
            +
            specification_version: 4
         
     | 
| 
      
 140 
     | 
    
         
            +
            summary: SERP
         
     | 
| 
      
 141 
     | 
    
         
            +
            test_files: []
         
     |