kraps 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +28 -30
- data/README.md +6 -5
- data/lib/kraps/hash_partitioner.rb +7 -0
- data/lib/kraps/job.rb +3 -3
- data/lib/kraps/version.rb +1 -1
- data/lib/kraps/worker.rb +15 -7
- data/lib/kraps.rb +1 -0
- metadata +6 -4
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: f5bb601e7ee415b95b4b258a0241c25e6fe19eb3e772c06d4149afbfcfbe6c3d
         | 
| 4 | 
            +
              data.tar.gz: cb948c05947e48d2d8e970eebbc6e2c4a5b0a88cb162ad87bf0743196f6bcaef
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 1d1c5a16205c5584626fed5bca9b6c7dd6fae3b4f3c725b158e7740f6fa05a17abdcb483b43cbdad813576e2fc2c7621b89b94d61b32776d85ae774f5a4332d1
         | 
| 7 | 
            +
              data.tar.gz: 2670dbc002633e801d8cf98fc8454c8881295f72b505bd4baf6cf0c8685a8c97a8a2dbf26e8a617c74b452ef627e820807e7af6e05b20a627fb99ce2eb216a1a
         | 
    
        data/.rubocop.yml
    CHANGED
    
    
    
        data/CHANGELOG.md
    ADDED
    
    
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,22 +1,21 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                kraps (0. | 
| 4 | 
            +
                kraps (0.2.0)
         | 
| 5 5 | 
             
                  attachie
         | 
| 6 6 | 
             
                  distributed_job
         | 
| 7 | 
            -
                  map-reduce-ruby (>=  | 
| 7 | 
            +
                  map-reduce-ruby (>= 3.0.0)
         | 
| 8 8 | 
             
                  redis
         | 
| 9 9 | 
             
                  ruby-progressbar
         | 
| 10 10 |  | 
| 11 11 | 
             
            GEM
         | 
| 12 12 | 
             
              remote: https://rubygems.org/
         | 
| 13 13 | 
             
              specs:
         | 
| 14 | 
            -
                activesupport ( | 
| 14 | 
            +
                activesupport (7.0.4)
         | 
| 15 15 | 
             
                  concurrent-ruby (~> 1.0, >= 1.0.2)
         | 
| 16 16 | 
             
                  i18n (>= 1.6, < 2)
         | 
| 17 17 | 
             
                  minitest (>= 5.1)
         | 
| 18 18 | 
             
                  tzinfo (~> 2.0)
         | 
| 19 | 
            -
                  zeitwerk (~> 2.3)
         | 
| 20 19 | 
             
                ast (2.4.2)
         | 
| 21 20 | 
             
                attachie (1.2.0)
         | 
| 22 21 | 
             
                  activesupport
         | 
| @@ -24,17 +23,17 @@ GEM | |
| 24 23 | 
             
                  connection_pool
         | 
| 25 24 | 
             
                  mime-types
         | 
| 26 25 | 
             
                aws-eventstream (1.2.0)
         | 
| 27 | 
            -
                aws-partitions (1. | 
| 28 | 
            -
                aws-sdk-core (3. | 
| 26 | 
            +
                aws-partitions (1.654.0)
         | 
| 27 | 
            +
                aws-sdk-core (3.166.0)
         | 
| 29 28 | 
             
                  aws-eventstream (~> 1, >= 1.0.2)
         | 
| 30 | 
            -
                  aws-partitions (~> 1, >= 1. | 
| 31 | 
            -
                  aws-sigv4 (~> 1. | 
| 29 | 
            +
                  aws-partitions (~> 1, >= 1.651.0)
         | 
| 30 | 
            +
                  aws-sigv4 (~> 1.5)
         | 
| 32 31 | 
             
                  jmespath (~> 1, >= 1.6.1)
         | 
| 33 | 
            -
                aws-sdk-kms (1. | 
| 34 | 
            -
                  aws-sdk-core (~> 3, >= 3. | 
| 32 | 
            +
                aws-sdk-kms (1.59.0)
         | 
| 33 | 
            +
                  aws-sdk-core (~> 3, >= 3.165.0)
         | 
| 35 34 | 
             
                  aws-sigv4 (~> 1.1)
         | 
| 36 | 
            -
                aws-sdk-s3 (1. | 
| 37 | 
            -
                  aws-sdk-core (~> 3, >= 3. | 
| 35 | 
            +
                aws-sdk-s3 (1.117.1)
         | 
| 36 | 
            +
                  aws-sdk-core (~> 3, >= 3.165.0)
         | 
| 38 37 | 
             
                  aws-sdk-kms (~> 1)
         | 
| 39 38 | 
             
                  aws-sigv4 (~> 1.4)
         | 
| 40 39 | 
             
                aws-sigv4 (1.5.2)
         | 
| @@ -49,7 +48,7 @@ GEM | |
| 49 48 | 
             
                jmespath (1.6.1)
         | 
| 50 49 | 
             
                json (2.6.2)
         | 
| 51 50 | 
             
                lazy_priority_queue (0.1.1)
         | 
| 52 | 
            -
                map-reduce-ruby ( | 
| 51 | 
            +
                map-reduce-ruby (3.0.0)
         | 
| 53 52 | 
             
                  json
         | 
| 54 53 | 
             
                  lazy_priority_queue
         | 
| 55 54 | 
             
                mime-types (3.4.1)
         | 
| @@ -63,40 +62,39 @@ GEM | |
| 63 62 | 
             
                rake (13.0.6)
         | 
| 64 63 | 
             
                redis (5.0.5)
         | 
| 65 64 | 
             
                  redis-client (>= 0.9.0)
         | 
| 66 | 
            -
                redis-client (0. | 
| 65 | 
            +
                redis-client (0.11.0)
         | 
| 67 66 | 
             
                  connection_pool
         | 
| 68 | 
            -
                regexp_parser (2. | 
| 67 | 
            +
                regexp_parser (2.6.0)
         | 
| 69 68 | 
             
                rexml (3.2.5)
         | 
| 70 | 
            -
                rspec (3. | 
| 71 | 
            -
                  rspec-core (~> 3. | 
| 72 | 
            -
                  rspec-expectations (~> 3. | 
| 73 | 
            -
                  rspec-mocks (~> 3. | 
| 74 | 
            -
                rspec-core (3. | 
| 75 | 
            -
                  rspec-support (~> 3. | 
| 76 | 
            -
                rspec-expectations (3. | 
| 69 | 
            +
                rspec (3.12.0)
         | 
| 70 | 
            +
                  rspec-core (~> 3.12.0)
         | 
| 71 | 
            +
                  rspec-expectations (~> 3.12.0)
         | 
| 72 | 
            +
                  rspec-mocks (~> 3.12.0)
         | 
| 73 | 
            +
                rspec-core (3.12.0)
         | 
| 74 | 
            +
                  rspec-support (~> 3.12.0)
         | 
| 75 | 
            +
                rspec-expectations (3.12.0)
         | 
| 77 76 | 
             
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 78 | 
            -
                  rspec-support (~> 3. | 
| 79 | 
            -
                rspec-mocks (3. | 
| 77 | 
            +
                  rspec-support (~> 3.12.0)
         | 
| 78 | 
            +
                rspec-mocks (3.12.0)
         | 
| 80 79 | 
             
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 81 | 
            -
                  rspec-support (~> 3. | 
| 82 | 
            -
                rspec-support (3. | 
| 83 | 
            -
                rubocop (1. | 
| 80 | 
            +
                  rspec-support (~> 3.12.0)
         | 
| 81 | 
            +
                rspec-support (3.12.0)
         | 
| 82 | 
            +
                rubocop (1.38.0)
         | 
| 84 83 | 
             
                  json (~> 2.3)
         | 
| 85 84 | 
             
                  parallel (~> 1.10)
         | 
| 86 85 | 
             
                  parser (>= 3.1.2.1)
         | 
| 87 86 | 
             
                  rainbow (>= 2.2.2, < 4.0)
         | 
| 88 87 | 
             
                  regexp_parser (>= 1.8, < 3.0)
         | 
| 89 88 | 
             
                  rexml (>= 3.2.5, < 4.0)
         | 
| 90 | 
            -
                  rubocop-ast (>= 1. | 
| 89 | 
            +
                  rubocop-ast (>= 1.23.0, < 2.0)
         | 
| 91 90 | 
             
                  ruby-progressbar (~> 1.7)
         | 
| 92 91 | 
             
                  unicode-display_width (>= 1.4.0, < 3.0)
         | 
| 93 | 
            -
                rubocop-ast (1. | 
| 92 | 
            +
                rubocop-ast (1.23.0)
         | 
| 94 93 | 
             
                  parser (>= 3.1.1.0)
         | 
| 95 94 | 
             
                ruby-progressbar (1.11.0)
         | 
| 96 95 | 
             
                tzinfo (2.0.5)
         | 
| 97 96 | 
             
                  concurrent-ruby (~> 1.0)
         | 
| 98 97 | 
             
                unicode-display_width (2.3.0)
         | 
| 99 | 
            -
                zeitwerk (2.6.1)
         | 
| 100 98 |  | 
| 101 99 | 
             
            PLATFORMS
         | 
| 102 100 | 
             
              ruby
         | 
    
        data/README.md
    CHANGED
    
    | @@ -143,17 +143,18 @@ split. Kraps assigns every `key` to a partition, either using a custom | |
| 143 143 | 
             
            `partitioner` or the default built in hash partitioner. The hash partitioner
         | 
| 144 144 | 
             
            simply calculates a hash of your key modulo the number of partitions and the
         | 
| 145 145 | 
             
            resulting partition number is the partition where the respective key is
         | 
| 146 | 
            -
            assigned to. A partitioner is a callable which gets the key  | 
| 147 | 
            -
            returns a partition number. The built in hash | 
| 148 | 
            -
            one:
         | 
| 146 | 
            +
            assigned to. A partitioner is a callable which gets the key and the number of
         | 
| 147 | 
            +
            partitions as argument and returns a partition number. The built in hash
         | 
| 148 | 
            +
            partitioner looks similar to this one:
         | 
| 149 149 |  | 
| 150 150 | 
             
            ```ruby
         | 
| 151 | 
            -
            partitioner = proc { |key| Digest::SHA1.hexdigest(key.inspect)[0..4].to_i(16) %  | 
| 151 | 
            +
            partitioner = proc { |key, num_partitions| Digest::SHA1.hexdigest(key.inspect)[0..4].to_i(16) % num_partitions }
         | 
| 152 152 | 
             
            ```
         | 
| 153 153 |  | 
| 154 154 | 
             
            Please note, it's important that the partitioner and the specified number of
         | 
| 155 155 | 
             
            partitions stays in sync. When you use a custom partitioner, please make sure
         | 
| 156 | 
            -
            that the partitioner  | 
| 156 | 
            +
            that the partitioner correctly returns a partition number in the range of
         | 
| 157 | 
            +
            `0...num_partitions`.
         | 
| 157 158 |  | 
| 158 159 | 
             
            ## Datatypes
         | 
| 159 160 |  | 
    
        data/lib/kraps/job.rb
    CHANGED
    
    | @@ -6,10 +6,10 @@ module Kraps | |
| 6 6 | 
             
                  @worker = worker
         | 
| 7 7 | 
             
                  @steps = []
         | 
| 8 8 | 
             
                  @partitions = 0
         | 
| 9 | 
            -
                  @partitioner =  | 
| 9 | 
            +
                  @partitioner = HashPartitioner.new
         | 
| 10 10 | 
             
                end
         | 
| 11 11 |  | 
| 12 | 
            -
                def parallelize(partitions:, partitioner:  | 
| 12 | 
            +
                def parallelize(partitions:, partitioner: HashPartitioner.new, worker: @worker, &block)
         | 
| 13 13 | 
             
                  fresh.tap do |job|
         | 
| 14 14 | 
             
                    job.instance_eval do
         | 
| 15 15 | 
             
                      @partitions = partitions
         | 
| @@ -24,7 +24,7 @@ module Kraps | |
| 24 24 | 
             
                  fresh.tap do |job|
         | 
| 25 25 | 
             
                    job.instance_eval do
         | 
| 26 26 | 
             
                      @partitions = partitions if partitions
         | 
| 27 | 
            -
                      @partitioner = partitioner  | 
| 27 | 
            +
                      @partitioner = partitioner if partitioner
         | 
| 28 28 |  | 
| 29 29 | 
             
                      @steps << Step.new(action: Actions::MAP, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
         | 
| 30 30 | 
             
                    end
         | 
    
        data/lib/kraps/version.rb
    CHANGED
    
    
    
        data/lib/kraps/worker.rb
    CHANGED
    
    | @@ -31,8 +31,12 @@ module Kraps | |
| 31 31 | 
             
                  mapper = MapReduce::Mapper.new(implementation.new, partitioner: partitioner, memory_limit: @memory_limit)
         | 
| 32 32 | 
             
                  mapper.map(@args["item"])
         | 
| 33 33 |  | 
| 34 | 
            -
                  mapper.shuffle do | | 
| 35 | 
            -
                     | 
| 34 | 
            +
                  mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
         | 
| 35 | 
            +
                    Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
         | 
| 36 | 
            +
                      File.open(path) do |stream|
         | 
| 37 | 
            +
                        Kraps.driver.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{@args["part"]}.json"), stream, Kraps.driver.bucket)
         | 
| 38 | 
            +
                      end
         | 
| 39 | 
            +
                    end
         | 
| 36 40 | 
             
                  end
         | 
| 37 41 | 
             
                end
         | 
| 38 42 |  | 
| @@ -68,10 +72,14 @@ module Kraps | |
| 68 72 | 
             
                    end
         | 
| 69 73 | 
             
                  end
         | 
| 70 74 |  | 
| 71 | 
            -
                  mapper.shuffle do | | 
| 72 | 
            -
                     | 
| 73 | 
            -
                       | 
| 74 | 
            -
             | 
| 75 | 
            +
                  mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
         | 
| 76 | 
            +
                    Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
         | 
| 77 | 
            +
                      File.open(path) do |stream|
         | 
| 78 | 
            +
                        Kraps.driver.driver.store(
         | 
| 79 | 
            +
                          Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{@args["part"]}.json"), stream, Kraps.driver.bucket
         | 
| 80 | 
            +
                        )
         | 
| 81 | 
            +
                      end
         | 
| 82 | 
            +
                    end
         | 
| 75 83 | 
             
                  end
         | 
| 76 84 | 
             
                ensure
         | 
| 77 85 | 
             
                  temp_paths&.unlink
         | 
| @@ -173,7 +181,7 @@ module Kraps | |
| 173 181 | 
             
                end
         | 
| 174 182 |  | 
| 175 183 | 
             
                def partitioner
         | 
| 176 | 
            -
                  @partitioner ||= step.args[:partitioner]
         | 
| 184 | 
            +
                  @partitioner ||= proc { |key| step.args[:partitioner].call(key, step.args[:partitions]) }
         | 
| 177 185 | 
             
                end
         | 
| 178 186 |  | 
| 179 187 | 
             
                def distributed_job
         | 
    
        data/lib/kraps.rb
    CHANGED
    
    | @@ -2,6 +2,7 @@ require_relative "kraps/version" | |
| 2 2 | 
             
            require_relative "kraps/drivers"
         | 
| 3 3 | 
             
            require_relative "kraps/actions"
         | 
| 4 4 | 
             
            require_relative "kraps/parallelizer"
         | 
| 5 | 
            +
            require_relative "kraps/hash_partitioner"
         | 
| 5 6 | 
             
            require_relative "kraps/temp_path"
         | 
| 6 7 | 
             
            require_relative "kraps/temp_paths"
         | 
| 7 8 | 
             
            require_relative "kraps/timeout_queue"
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: kraps
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.3.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Benjamin Vetter
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2022- | 
| 11 | 
            +
            date: 2022-11-07 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: attachie
         | 
| @@ -44,14 +44,14 @@ dependencies: | |
| 44 44 | 
             
                requirements:
         | 
| 45 45 | 
             
                - - ">="
         | 
| 46 46 | 
             
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            -
                    version:  | 
| 47 | 
            +
                    version: 3.0.0
         | 
| 48 48 | 
             
              type: :runtime
         | 
| 49 49 | 
             
              prerelease: false
         | 
| 50 50 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 51 | 
             
                requirements:
         | 
| 52 52 | 
             
                - - ">="
         | 
| 53 53 | 
             
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            -
                    version:  | 
| 54 | 
            +
                    version: 3.0.0
         | 
| 55 55 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 56 56 | 
             
              name: redis
         | 
| 57 57 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -132,6 +132,7 @@ extra_rdoc_files: [] | |
| 132 132 | 
             
            files:
         | 
| 133 133 | 
             
            - ".rspec"
         | 
| 134 134 | 
             
            - ".rubocop.yml"
         | 
| 135 | 
            +
            - CHANGELOG.md
         | 
| 135 136 | 
             
            - CODE_OF_CONDUCT.md
         | 
| 136 137 | 
             
            - Gemfile
         | 
| 137 138 | 
             
            - Gemfile.lock
         | 
| @@ -143,6 +144,7 @@ files: | |
| 143 144 | 
             
            - lib/kraps/actions.rb
         | 
| 144 145 | 
             
            - lib/kraps/drivers.rb
         | 
| 145 146 | 
             
            - lib/kraps/frame.rb
         | 
| 147 | 
            +
            - lib/kraps/hash_partitioner.rb
         | 
| 146 148 | 
             
            - lib/kraps/interval.rb
         | 
| 147 149 | 
             
            - lib/kraps/job.rb
         | 
| 148 150 | 
             
            - lib/kraps/parallelizer.rb
         |