web_reptile 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +10 -0
 - data/.rspec +2 -0
 - data/.travis.yml +10 -0
 - data/Gemfile +6 -0
 - data/README.md +48 -0
 - data/Rakefile +6 -0
 - data/bin/console +14 -0
 - data/bin/setup +8 -0
 - data/lib/web_reptile.rb +30 -0
 - data/lib/web_reptile/version.rb +3 -0
 - data/web_reptile.gemspec +23 -0
 - metadata +96 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 889d004908345db250a70bb98af7d316c5a78acb
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 8ca6a65a23367a6e08b3b4311a690bdfecacbb09
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512:
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 1d02c6e675541da068296120e4a0525de4bcbff6e93880d2aeefb615fd791f7df9e6ba7d3e59b3a4d5f7f1e8c315831b00a55a375c3220aa8b07bdbb5d49d7b6
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 47bac14d750fae6712366afb419313214513ec368674836cadd255718b1c5837f8f2552ab0353a6d0d72fcc54a3bb3b181d3676ea943982c872e91c5f0a33c24
         
     | 
    
        data/.gitignore
    ADDED
    
    
    
        data/.rspec
    ADDED
    
    
    
        data/.travis.yml
    ADDED
    
    
    
        data/Gemfile
    ADDED
    
    
    
        data/README.md
    ADDED
    
    | 
         @@ -0,0 +1,48 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # WebReptile
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            [](https://travis-ci.org/DotHide/reptile) 
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            WebReptile is a web spider framework using Ruby.
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            ## Installation
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            Add this line to your application's Gemfile:
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 12 
     | 
    
         
            +
            gem 'web_reptile'
         
     | 
| 
      
 13 
     | 
    
         
            +
            ```
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            And then execute:
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                $ bundle
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            Or install it yourself as:
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                $ gem install web_reptile
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            ## Usage
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 26 
     | 
    
         
            +
            require('web_reptile')
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
            domain = WebReptile.url("http://www.jd.com/allSort.aspx")
         
     | 
| 
      
 29 
     | 
    
         
            +
            items = domain.grab(".category-items .category-item .items dt a")
         
     | 
| 
      
 30 
     | 
    
         
            +
            items.map do |item|
         
     | 
| 
      
 31 
     | 
    
         
            +
                href = item.attr('href')
         
     | 
| 
      
 32 
     | 
    
         
            +
                text = item.text
         
     | 
| 
      
 33 
     | 
    
         
            +
                "[#{text}](#{href})"
         
     | 
| 
      
 34 
     | 
    
         
            +
            end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
            # ["[电子书刊](//e.jd.com/ebook.html)", "[音像](//mvd.jd.com/)", "[英文原版](//channel.jd.com/1713-4855.html)", "[文艺](//channel.jd.com/p_wenxuezongheguan.html)", "[少儿](//book.jd.com/children.html)", "[人文社科](//book.jd.com/library/socialscience.html)", "[经管励志](//channel.jd.com/p_Comprehensive.html)", ...]
         
     | 
| 
      
 37 
     | 
    
         
            +
            ```
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            ## Development
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            ## Contributing
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            Bug reports and pull requests are welcome on GitHub at https://github.com/DotHide/web_reptile.
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
    
        data/Rakefile
    ADDED
    
    
    
        data/bin/console
    ADDED
    
    | 
         @@ -0,0 +1,14 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "bundler/setup"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "reptile"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            # You can add fixtures and/or initialization code here to make experimenting
         
     | 
| 
      
 7 
     | 
    
         
            +
            # with your gem easier. You can also use a different console, if you like.
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            # (If you use this, don't forget to add pry to your Gemfile!)
         
     | 
| 
      
 10 
     | 
    
         
            +
            # require "pry"
         
     | 
| 
      
 11 
     | 
    
         
            +
            # Pry.start
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            require "irb"
         
     | 
| 
      
 14 
     | 
    
         
            +
            IRB.start
         
     | 
    
        data/bin/setup
    ADDED
    
    
    
        data/lib/web_reptile.rb
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "web_reptile/version"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "mechanize"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            module WebReptile
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
              def WebReptile.url(url, options = {}, &block)
         
     | 
| 
      
 7 
     | 
    
         
            +
                Core.url(url, options, &block)
         
     | 
| 
      
 8 
     | 
    
         
            +
              end
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
              class Core
         
     | 
| 
      
 11 
     | 
    
         
            +
                def initialize(url, opts = {})
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @url = url.is_a?(URI) ? url : URI(url)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @opts = opts
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                  agent = Mechanize.new
         
     | 
| 
      
 16 
     | 
    
         
            +
                  @page = agent.get(@url)
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                def self.url(url, opts = {})
         
     | 
| 
      
 20 
     | 
    
         
            +
                  self.new(url, opts) do |web|
         
     | 
| 
      
 21 
     | 
    
         
            +
                    yield web if block_given?
         
     | 
| 
      
 22 
     | 
    
         
            +
                    self
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                def grab(selector)
         
     | 
| 
      
 27 
     | 
    
         
            +
                   items = @page.search(selector)
         
     | 
| 
      
 28 
     | 
    
         
            +
                end
         
     | 
| 
      
 29 
     | 
    
         
            +
              end
         
     | 
| 
      
 30 
     | 
    
         
            +
            end
         
     | 
    
        data/web_reptile.gemspec
    ADDED
    
    | 
         @@ -0,0 +1,23 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # coding: utf-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            lib = File.expand_path('../lib', __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'web_reptile/version'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            Gem::Specification.new do |spec|
         
     | 
| 
      
 7 
     | 
    
         
            +
              spec.name          = "web_reptile"
         
     | 
| 
      
 8 
     | 
    
         
            +
              spec.version       = WebReptile::VERSION
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.authors       = ["Martin_nett"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              spec.email         = ["dothide@gmail.com"]
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              spec.summary       = %q{WebReptile is a web spider framework using Ruby.}
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.homepage      = "https://github.com/DotHide/reptile"
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
         
     | 
| 
      
 16 
     | 
    
         
            +
              spec.bindir        = "exe"
         
     | 
| 
      
 17 
     | 
    
         
            +
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         
     | 
| 
      
 18 
     | 
    
         
            +
              spec.require_paths = ["lib"]
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              spec.add_development_dependency "bundler", "~> 1.11"
         
     | 
| 
      
 21 
     | 
    
         
            +
              spec.add_development_dependency "rake", "~> 10.0"
         
     | 
| 
      
 22 
     | 
    
         
            +
              spec.add_development_dependency "rspec", "~> 3.0"
         
     | 
| 
      
 23 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,96 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: web_reptile
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.0
         
     | 
| 
      
 5 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 6 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 7 
     | 
    
         
            +
            - Martin_nett
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 9 
     | 
    
         
            +
            bindir: exe
         
     | 
| 
      
 10 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2016-06-21 00:00:00.000000000 Z
         
     | 
| 
      
 12 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 13 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 14 
     | 
    
         
            +
              name: bundler
         
     | 
| 
      
 15 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 16 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 17 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 18 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 19 
     | 
    
         
            +
                    version: '1.11'
         
     | 
| 
      
 20 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 21 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 22 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 23 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 24 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 25 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 26 
     | 
    
         
            +
                    version: '1.11'
         
     | 
| 
      
 27 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 28 
     | 
    
         
            +
              name: rake
         
     | 
| 
      
 29 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 30 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 31 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 32 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 33 
     | 
    
         
            +
                    version: '10.0'
         
     | 
| 
      
 34 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 35 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 36 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 37 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 38 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 39 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 40 
     | 
    
         
            +
                    version: '10.0'
         
     | 
| 
      
 41 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 42 
     | 
    
         
            +
              name: rspec
         
     | 
| 
      
 43 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 44 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 45 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 46 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 47 
     | 
    
         
            +
                    version: '3.0'
         
     | 
| 
      
 48 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 49 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 50 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 51 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 52 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 53 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 54 
     | 
    
         
            +
                    version: '3.0'
         
     | 
| 
      
 55 
     | 
    
         
            +
            description: 
         
     | 
| 
      
 56 
     | 
    
         
            +
            email:
         
     | 
| 
      
 57 
     | 
    
         
            +
            - dothide@gmail.com
         
     | 
| 
      
 58 
     | 
    
         
            +
            executables: []
         
     | 
| 
      
 59 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 60 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 61 
     | 
    
         
            +
            files:
         
     | 
| 
      
 62 
     | 
    
         
            +
            - ".gitignore"
         
     | 
| 
      
 63 
     | 
    
         
            +
            - ".rspec"
         
     | 
| 
      
 64 
     | 
    
         
            +
            - ".travis.yml"
         
     | 
| 
      
 65 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 66 
     | 
    
         
            +
            - README.md
         
     | 
| 
      
 67 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 68 
     | 
    
         
            +
            - bin/console
         
     | 
| 
      
 69 
     | 
    
         
            +
            - bin/setup
         
     | 
| 
      
 70 
     | 
    
         
            +
            - lib/web_reptile.rb
         
     | 
| 
      
 71 
     | 
    
         
            +
            - lib/web_reptile/version.rb
         
     | 
| 
      
 72 
     | 
    
         
            +
            - web_reptile.gemspec
         
     | 
| 
      
 73 
     | 
    
         
            +
            homepage: https://github.com/DotHide/reptile
         
     | 
| 
      
 74 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 75 
     | 
    
         
            +
            metadata: {}
         
     | 
| 
      
 76 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 77 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 78 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 79 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 80 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 81 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 82 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 83 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 84 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 85 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 86 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 87 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 88 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 89 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 90 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 91 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 92 
     | 
    
         
            +
            rubygems_version: 2.6.4
         
     | 
| 
      
 93 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 94 
     | 
    
         
            +
            specification_version: 4
         
     | 
| 
      
 95 
     | 
    
         
            +
            summary: WebReptile is a web spider framework using Ruby.
         
     | 
| 
      
 96 
     | 
    
         
            +
            test_files: []
         
     |