arb-bs 0.1.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/arb-bs.gemspec +1 -0
- data/exe/bs_pic +24 -17
- data/lib/arb/bs/version.rb +1 -1
- metadata +17 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: d3fee1fcde122cff7050d84adeee0089b80a7b59
         | 
| 4 | 
            +
              data.tar.gz: f54a5e8778a5718c7c88fb9b4f336bbf0ec1f423
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: a915ebb6fe64cf5579683cf1b32082cae589758534b423b9cfce73ef9c70c18f715d442f62283655497dfa9b2c404c88bf57269fdac44c9dd8c8ad0957ad434e
         | 
| 7 | 
            +
              data.tar.gz: 36f3da886a3eb1cff62adbd87aee7def07da9e37a8b62c8f3005ba6023a4444dc0d92ed3b049e4bd442108ed4f149e09051356e9fce551f11473c011cb24bbac
         | 
    
        data/arb-bs.gemspec
    CHANGED
    
    
    
        data/exe/bs_pic
    CHANGED
    
    | @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            #!/usr/bin/env ruby
         | 
| 2 2 |  | 
| 3 | 
            +
            require 'arb/thread'
         | 
| 3 4 | 
             
            require 'arb/crawler'
         | 
| 4 5 |  | 
| 5 6 | 
             
            include Arb
         | 
| @@ -9,31 +10,37 @@ map_file='map.txt' | |
| 9 10 | 
             
            max_page=(ARGV[0] || 50).to_i
         | 
| 10 11 | 
             
            #Minimun idle time(in seconds) between two complete rounds.
         | 
| 11 12 | 
             
            min_idle_time=(ARGV[1] || 600).to_i
         | 
| 13 | 
            +
            thread_count=(ARGV[2] || 3).to_i
         | 
| 12 14 |  | 
| 13 15 |  | 
| 14 16 | 
             
            File.open(map_file,'w+') unless File.exists? map_file
         | 
| 15 17 |  | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
                 | 
| 20 | 
            -
                   | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
                     | 
| 27 | 
            -
                       | 
| 28 | 
            -
                      File. | 
| 29 | 
            -
                         | 
| 18 | 
            +
            Thread.parallel(thread_count) do |dispatcher|
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              loop do
         | 
| 21 | 
            +
                "http://#{domain}/pic/?".enum('?',1..max_page).each_with_index do |url,index|
         | 
| 22 | 
            +
                  dispatcher.new_task do
         | 
| 23 | 
            +
                    res=Crawler.get_by_css(url,"div.j-r-list-c-img a img")
         | 
| 24 | 
            +
                    unless res
         | 
| 25 | 
            +
                      puts "Some errors occur when parsing page #{index+1}."
         | 
| 26 | 
            +
                      next
         | 
| 27 | 
            +
                    end
         | 
| 28 | 
            +
                    res.each do |hash|
         | 
| 29 | 
            +
                      url_file=Crawler.filename_of_url(hash[:"data-original"])
         | 
| 30 | 
            +
                      unless File.readlines(map_file).find{|line| line.to_s.include? url_file}
         | 
| 31 | 
            +
                        if Crawler.download(hash[:"data-original"],url_file)
         | 
| 32 | 
            +
                          puts "#{hash[:'data-original']}\n#{hash[:title]}",''
         | 
| 33 | 
            +
                          File.open map_file,'a' do |file|
         | 
| 34 | 
            +
                            file.puts "#{url_file}:#{Crawler.filter_str(hash[:title])}"
         | 
| 35 | 
            +
                          end
         | 
| 36 | 
            +
                        end
         | 
| 30 37 | 
             
                      end
         | 
| 31 38 | 
             
                    end
         | 
| 39 | 
            +
                    tmp=1+rand(5)
         | 
| 40 | 
            +
                    puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
         | 
| 41 | 
            +
                    sleep tmp
         | 
| 32 42 | 
             
                  end
         | 
| 33 43 | 
             
                end
         | 
| 34 | 
            -
                tmp=1+rand(5)
         | 
| 35 | 
            -
                puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
         | 
| 36 | 
            -
                sleep tmp
         | 
| 37 44 | 
             
              end
         | 
| 38 45 | 
             
              tmp=min_idle_time+rand(5)
         | 
| 39 46 | 
             
              puts "Complete round finished, next action in #{tmp} seconds later."
         | 
    
        data/lib/arb/bs/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: arb-bs
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 1.0.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - arybin
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2018-04-05 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bundler
         | 
| @@ -52,6 +52,20 @@ dependencies: | |
| 52 52 | 
             
                - - ">="
         | 
| 53 53 | 
             
                  - !ruby/object:Gem::Version
         | 
| 54 54 | 
             
                    version: '0'
         | 
| 55 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            +
              name: arb-thread
         | 
| 57 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 | 
            +
                requirements:
         | 
| 59 | 
            +
                - - ">="
         | 
| 60 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            +
                    version: '0'
         | 
| 62 | 
            +
              type: :runtime
         | 
| 63 | 
            +
              prerelease: false
         | 
| 64 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 | 
            +
                requirements:
         | 
| 66 | 
            +
                - - ">="
         | 
| 67 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            +
                    version: '0'
         | 
| 55 69 | 
             
            description: A demo of Web Crawler using arb-crawler
         | 
| 56 70 | 
             
            email:
         | 
| 57 71 | 
             
            - arybin@163.com
         | 
| @@ -89,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 89 103 | 
             
                  version: '0'
         | 
| 90 104 | 
             
            requirements: []
         | 
| 91 105 | 
             
            rubyforge_project: 
         | 
| 92 | 
            -
            rubygems_version: 2. | 
| 106 | 
            +
            rubygems_version: 2.6.14
         | 
| 93 107 | 
             
            signing_key: 
         | 
| 94 108 | 
             
            specification_version: 4
         | 
| 95 109 | 
             
            summary: A demo of Web Crawler using arb-crawler
         |