filey-diff 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/changelog.md +4 -0
- data/filey-diff.gemspec +1 -1
- data/lib/filey-diff/data-sources/aws_sdk_s3.rb +8 -2
- data/spec/data_sources_spec.rb +34 -7
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 1fe9b2c5268647baab2e70424019f87a308c45d4
         | 
| 4 | 
            +
              data.tar.gz: 76c1429b9a18f19dae15856372fefa39edb7b45a
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: ba377502534f9b43ba5d1abf50b1bb15f43dc975863856414e1dc3ea8642f01616869b0cd0578168165fba51fe93df54ca1f662aafe818418482505105709886
         | 
| 7 | 
            +
              data.tar.gz: 88c98411ed8cafa1a598b2aa869831eb93d3c6bc20ed707ad3ec9a4d0056f83a5064f17ec3e8e962d871ca88a683cebe6333161c69ed254244d04beff839ab94
         | 
    
        data/README.md
    CHANGED
    
    | @@ -61,6 +61,18 @@ Filey::Comparison.list_outdated(fs_data_source, s3_data_source).each { |filey| | |
| 61 61 | 
             
            }
         | 
| 62 62 | 
             
            ```
         | 
| 63 63 |  | 
| 64 | 
            +
            ## AWS SDK data source
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            ### Specifing custom concurrency level
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            The concurrency level determines the amount of parallel operations that the
         | 
| 69 | 
            +
            `AwsSdkS3` data source performs against the S3 API.
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            ```ruby
         | 
| 72 | 
            +
            config = { :concurrency_level => 1000 }
         | 
| 73 | 
            +
            s3_data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
         | 
| 74 | 
            +
            ```
         | 
| 75 | 
            +
             | 
| 64 76 | 
             
            ## Example use cases
         | 
| 65 77 |  | 
| 66 78 | 
             
            Arnie has a blog on AWS S3. He has just finished a new post and wants to upload
         | 
    
        data/changelog.md
    CHANGED
    
    
    
        data/filey-diff.gemspec
    CHANGED
    
    
| @@ -1,8 +1,9 @@ | |
| 1 1 | 
             
            module Filey
         | 
| 2 2 | 
             
              module DataSources
         | 
| 3 3 | 
             
                class AwsSdkS3 < DataSource
         | 
| 4 | 
            -
                  def initialize(s3_bucket)
         | 
| 4 | 
            +
                  def initialize(s3_bucket, config = { :concurrency_level => DEFAULT_CONCURRENCY_LEVEL })
         | 
| 5 5 | 
             
                    @s3_bucket = s3_bucket
         | 
| 6 | 
            +
                    @config = config
         | 
| 6 7 | 
             
                  end
         | 
| 7 8 |  | 
| 8 9 | 
             
                  private
         | 
| @@ -27,7 +28,7 @@ module Filey | |
| 27 28 | 
             
                    if ENV['disable_parallel_processing']
         | 
| 28 29 | 
             
                      jobs.each(&:call)
         | 
| 29 30 | 
             
                    else
         | 
| 30 | 
            -
                      jobs.each_slice( | 
| 31 | 
            +
                      jobs.each_slice(slice_size) { |jobs|
         | 
| 31 32 | 
             
                        threads = jobs.map { |job|
         | 
| 32 33 | 
             
                          Thread.new {
         | 
| 33 34 | 
             
                            job.call
         | 
| @@ -38,6 +39,11 @@ module Filey | |
| 38 39 | 
             
                    end
         | 
| 39 40 | 
             
                  end
         | 
| 40 41 |  | 
| 42 | 
            +
                  def slice_size
         | 
| 43 | 
            +
                    slice_size_from_cfg = @config[:concurrency_level] || @config['concurrency_level']
         | 
| 44 | 
            +
                    slice_size_from_cfg || DEFAULT_CONCURRENCY_LEVEL
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
             | 
| 41 47 | 
             
                  def map_s3_object_to_filey(s3_object)
         | 
| 42 48 | 
             
                    if (s3_object.key.include?'/')
         | 
| 43 49 | 
             
                      path = s3_object.key.scan(/(.*\/).*/).first.first
         | 
    
        data/spec/data_sources_spec.rb
    CHANGED
    
    | @@ -77,28 +77,55 @@ describe Filey::DataSources::AwsSdkS3 do | |
| 77 77 | 
             
              end
         | 
| 78 78 |  | 
| 79 79 | 
             
              context '#in_parallel_or_sequentially' do
         | 
| 80 | 
            -
                describe Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL do
         | 
| 81 | 
            -
                  concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
         | 
| 82 80 |  | 
| 83 | 
            -
             | 
| 81 | 
            +
                shared_examples 'concurrent processing' do |concurrency_level, config|
         | 
| 82 | 
            +
                  let(:operation_on_s3_object) {
         | 
| 84 83 | 
             
                    @ints = []
         | 
| 85 84 | 
             
                    operation_on_s3_object = lambda do |s3_object|
         | 
| 86 85 | 
             
                      @ints << s3_object
         | 
| 87 86 | 
             
                    end
         | 
| 88 | 
            -
             | 
| 89 | 
            -
             | 
| 87 | 
            +
                  }
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                  before(:each) {
         | 
| 90 | 
            +
                    s3_bucket = double(
         | 
| 91 | 
            +
                      's3_bucket',
         | 
| 92 | 
            +
                      :objects => (0..200).map { |int| int }
         | 
| 93 | 
            +
                  )
         | 
| 94 | 
            +
                    data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
         | 
| 90 95 | 
             
                    data_source.send(:in_parallel_or_sequentially, operation_on_s3_object)
         | 
| 91 96 | 
             
                  }
         | 
| 92 97 |  | 
| 93 | 
            -
                  it " | 
| 98 | 
            +
                  it "honors the concurrency level by processing the first #{concurrency_level} items first" do
         | 
| 94 99 | 
             
                    @ints.take(concurrency_level).all? { |int|
         | 
| 95 100 | 
             
                      int <= concurrency_level
         | 
| 96 101 | 
             
                    }.should be true
         | 
| 102 | 
            +
                  end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                  it "honors the concurrency level by processing the second patch of #{concurrency_level} items second" do
         | 
| 97 105 | 
             
                    @ints.drop(concurrency_level).take(concurrency_level).all? { |int|
         | 
| 98 | 
            -
                      int  | 
| 106 | 
            +
                      int >= concurrency_level && int < (2 * concurrency_level)
         | 
| 99 107 | 
             
                    }.should be true
         | 
| 100 108 | 
             
                  end
         | 
| 101 109 | 
             
                end
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                describe 'default concurrency level' do
         | 
| 112 | 
            +
                  concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                  include_examples(
         | 
| 115 | 
            +
                    'concurrent processing',
         | 
| 116 | 
            +
                    concurrency_level,
         | 
| 117 | 
            +
                    nil
         | 
| 118 | 
            +
                  )
         | 
| 119 | 
            +
                end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                describe 'specifying custom concurrency level' do
         | 
| 122 | 
            +
                  concurrency_level = 20
         | 
| 123 | 
            +
                  include_examples(
         | 
| 124 | 
            +
                    'concurrent processing',
         | 
| 125 | 
            +
                    concurrency_level,
         | 
| 126 | 
            +
                    { :concurrency_level => concurrency_level }
         | 
| 127 | 
            +
                  )
         | 
| 128 | 
            +
                end
         | 
| 102 129 | 
             
              end
         | 
| 103 130 |  | 
| 104 131 | 
             
              context 'gzip' do
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: filey-diff
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1. | 
| 4 | 
            +
              version: 1.3.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Lauri Lehmijoki
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2013-07- | 
| 11 | 
            +
            date: 2013-07-20 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rake
         |