arb-bs 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +9 -0
 - data/Gemfile +4 -0
 - data/README.md +36 -0
 - data/Rakefile +2 -0
 - data/arb-bs.gemspec +27 -0
 - data/bin/console +14 -0
 - data/bin/setup +8 -0
 - data/exe/bs_pic +33 -0
 - data/lib/arb/bs/version.rb +5 -0
 - data/lib/arb/bs.rb +1 -0
 - metadata +96 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 2a8da1d3d085af9aefaf5bb70c7815c8595e65dc
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 086af94724d0d41d008d0d72237822ee74c9afd0
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512:
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 2c9ceaa9dd4f344fdc0f0c798bbe0c9f911b21beec3ebef63d91b852cc7007fe62dec050c840cf9d6d2b31b4142cee3af8fb93c50913444f7d3fb57db1368b07
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 6eff3825d92b1d6d681fd0bcdb2cba4851777563fab715946afc802b2a20fd5960c6df08e790f98e6da6611e1829abbc82776c282457289d33c85541a19e3ec5
         
     | 
    
        data/.gitignore
    ADDED
    
    
    
        data/Gemfile
    ADDED
    
    
    
        data/README.md
    ADDED
    
    | 
         @@ -0,0 +1,36 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Arb::Bs
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/arb/bs`. To experiment with that code, run `bin/console` for an interactive prompt.
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            TODO: Delete this and the text above, and describe your gem
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            ## Installation
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            Add this line to your application's Gemfile:
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 12 
     | 
    
         
            +
            gem 'arb-bs'
         
     | 
| 
      
 13 
     | 
    
         
            +
            ```
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            And then execute:
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                $ bundle
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            Or install it yourself as:
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                $ gem install arb-bs
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            ## Usage
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            TODO: Write usage instructions here
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            ## Development
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
            To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            ## Contributing
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
            Bug reports and pull requests are welcome on GitHub at https://github.com/arybin/arb-bs.
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
    
        data/Rakefile
    ADDED
    
    
    
        data/arb-bs.gemspec
    ADDED
    
    | 
         @@ -0,0 +1,27 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # coding: utf-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            lib = File.expand_path('../lib', __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'arb/bs/version'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            Gem::Specification.new do |spec|
         
     | 
| 
      
 7 
     | 
    
         
            +
              spec.name          = "arb-bs"
         
     | 
| 
      
 8 
     | 
    
         
            +
              spec.version       = Arb::Bs::VERSION
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.authors       = ["arybin"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              spec.email         = ["arybin@163.com"]
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              spec.summary       = %q{A demo of Web Crawler using arb-crawler}
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.description   = %q{A demo of Web Crawler using arb-crawler}
         
     | 
| 
      
 14 
     | 
    
         
            +
              spec.homepage      = "https://github.com/arybin-cn/arb-bs"
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              spec.files         = `git ls-files -z`.split("\x0").reject do |f|
         
     | 
| 
      
 17 
     | 
    
         
            +
                f.match(%r{^(test|spec|features)/})
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
              spec.bindir        = "exe"
         
     | 
| 
      
 20 
     | 
    
         
            +
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         
     | 
| 
      
 21 
     | 
    
         
            +
              spec.require_paths = ["lib"]
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              spec.add_development_dependency "bundler", "~> 1.14"
         
     | 
| 
      
 24 
     | 
    
         
            +
              spec.add_development_dependency "rake", "~> 10.0"
         
     | 
| 
      
 25 
     | 
    
         
            +
              spec.add_dependency "arb-crawler"
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            end
         
     | 
    
        data/bin/console
    ADDED
    
    | 
         @@ -0,0 +1,14 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "bundler/setup"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "arb/bs"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            # You can add fixtures and/or initialization code here to make experimenting
         
     | 
| 
      
 7 
     | 
    
         
            +
            # with your gem easier. You can also use a different console, if you like.
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            # (If you use this, don't forget to add pry to your Gemfile!)
         
     | 
| 
      
 10 
     | 
    
         
            +
            # require "pry"
         
     | 
| 
      
 11 
     | 
    
         
            +
            # Pry.start
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            require "irb"
         
     | 
| 
      
 14 
     | 
    
         
            +
            IRB.start(__FILE__)
         
     | 
    
        data/bin/setup
    ADDED
    
    
    
        data/exe/bs_pic
    ADDED
    
    | 
         @@ -0,0 +1,33 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'arb/crawler'
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            include Arb
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            domain='www.budejie.com'
         
     | 
| 
      
 8 
     | 
    
         
            +
            map_file='map.txt'
         
     | 
| 
      
 9 
     | 
    
         
            +
            max_page=ARGV.first || 50
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            File.open(map_file,'w+') unless File.exists? map_file
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            loop do
         
     | 
| 
      
 14 
     | 
    
         
            +
              "http://#{domain}/pic/?".enum('?',1..max_page.to_i).each_with_index do |url,index|
         
     | 
| 
      
 15 
     | 
    
         
            +
                Crawler.get_by_css(url,"div.j-r-list-c-img a img").each do |hash|
         
     | 
| 
      
 16 
     | 
    
         
            +
                  url_file=Crawler.filename_of_url(hash[:"data-original"])
         
     | 
| 
      
 17 
     | 
    
         
            +
                  unless File.readlines(map_file).find{|line| line.to_s.include? url_file}
         
     | 
| 
      
 18 
     | 
    
         
            +
                    if Crawler.download(hash[:"data-original"],url_file)
         
     | 
| 
      
 19 
     | 
    
         
            +
                      puts "#{hash[:'data-original']}\n#{hash[:title]}",''
         
     | 
| 
      
 20 
     | 
    
         
            +
                      File.open map_file,'a' do |file|
         
     | 
| 
      
 21 
     | 
    
         
            +
                        file.puts "#{url_file}:#{hash[:title]}"
         
     | 
| 
      
 22 
     | 
    
         
            +
                      end
         
     | 
| 
      
 23 
     | 
    
         
            +
                    end
         
     | 
| 
      
 24 
     | 
    
         
            +
                  end
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
                tmp=1+rand(5)
         
     | 
| 
      
 27 
     | 
    
         
            +
                puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
         
     | 
| 
      
 28 
     | 
    
         
            +
                sleep tmp
         
     | 
| 
      
 29 
     | 
    
         
            +
              end
         
     | 
| 
      
 30 
     | 
    
         
            +
              tmp=30+rand(30)
         
     | 
| 
      
 31 
     | 
    
         
            +
              puts "Complete round finished, next action in #{tmp} seconds later."
         
     | 
| 
      
 32 
     | 
    
         
            +
              sleep tmp
         
     | 
| 
      
 33 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/arb/bs.rb
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "arb/bs/version"
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,96 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: arb-bs
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.2
         
     | 
| 
      
 5 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 6 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 7 
     | 
    
         
            +
            - arybin
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 9 
     | 
    
         
            +
            bindir: exe
         
     | 
| 
      
 10 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2017-02-13 00:00:00.000000000 Z
         
     | 
| 
      
 12 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 13 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 14 
     | 
    
         
            +
              name: bundler
         
     | 
| 
      
 15 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 16 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 17 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 18 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 19 
     | 
    
         
            +
                    version: '1.14'
         
     | 
| 
      
 20 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 21 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 22 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 23 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 24 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 25 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 26 
     | 
    
         
            +
                    version: '1.14'
         
     | 
| 
      
 27 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 28 
     | 
    
         
            +
              name: rake
         
     | 
| 
      
 29 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 30 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 31 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 32 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 33 
     | 
    
         
            +
                    version: '10.0'
         
     | 
| 
      
 34 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 35 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 36 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 37 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 38 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 39 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 40 
     | 
    
         
            +
                    version: '10.0'
         
     | 
| 
      
 41 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 42 
     | 
    
         
            +
              name: arb-crawler
         
     | 
| 
      
 43 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 44 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 45 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 46 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 47 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 48 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 49 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 50 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 51 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 52 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 53 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 54 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 55 
     | 
    
         
            +
            description: A demo of Web Crawler using arb-crawler
         
     | 
| 
      
 56 
     | 
    
         
            +
            email:
         
     | 
| 
      
 57 
     | 
    
         
            +
            - arybin@163.com
         
     | 
| 
      
 58 
     | 
    
         
            +
            executables:
         
     | 
| 
      
 59 
     | 
    
         
            +
            - bs_pic
         
     | 
| 
      
 60 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 61 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 62 
     | 
    
         
            +
            files:
         
     | 
| 
      
 63 
     | 
    
         
            +
            - ".gitignore"
         
     | 
| 
      
 64 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 65 
     | 
    
         
            +
            - README.md
         
     | 
| 
      
 66 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 67 
     | 
    
         
            +
            - arb-bs.gemspec
         
     | 
| 
      
 68 
     | 
    
         
            +
            - bin/console
         
     | 
| 
      
 69 
     | 
    
         
            +
            - bin/setup
         
     | 
| 
      
 70 
     | 
    
         
            +
            - exe/bs_pic
         
     | 
| 
      
 71 
     | 
    
         
            +
            - lib/arb/bs.rb
         
     | 
| 
      
 72 
     | 
    
         
            +
            - lib/arb/bs/version.rb
         
     | 
| 
      
 73 
     | 
    
         
            +
            homepage: https://github.com/arybin-cn/arb-bs
         
     | 
| 
      
 74 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 75 
     | 
    
         
            +
            metadata: {}
         
     | 
| 
      
 76 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 77 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 78 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 79 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 80 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 81 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 82 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 83 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 84 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 85 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 86 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 87 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 88 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 89 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 90 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 91 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 92 
     | 
    
         
            +
            rubygems_version: 2.4.8
         
     | 
| 
      
 93 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 94 
     | 
    
         
            +
            specification_version: 4
         
     | 
| 
      
 95 
     | 
    
         
            +
            summary: A demo of Web Crawler using arb-crawler
         
     | 
| 
      
 96 
     | 
    
         
            +
            test_files: []
         
     |