filey-diff 1.2.4 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed51d42ff41aad4b13ab034de0da688e6c76ae59
4
- data.tar.gz: 990b5d2fefb0fc6d019e2de83b694ce2ff27ae24
3
+ metadata.gz: 1fe9b2c5268647baab2e70424019f87a308c45d4
4
+ data.tar.gz: 76c1429b9a18f19dae15856372fefa39edb7b45a
5
5
  SHA512:
6
- metadata.gz: 9f214eea6234bc96138f0508b17cc9966bb6ef605b9b23c2f846017ec9fac11b9bfd28c512f38da93bc7ca25257f692c11f0f38a94e7b235b0828323b429995e
7
- data.tar.gz: e41c9a33c420db5c103a3711416fa01831389c1562e85123427ec588f0fb0e43b3a0859f283809bbd2c55f10dbcd35b9ad4400ed6bada2d3f9dbd2cfc2ef7931
6
+ metadata.gz: ba377502534f9b43ba5d1abf50b1bb15f43dc975863856414e1dc3ea8642f01616869b0cd0578168165fba51fe93df54ca1f662aafe818418482505105709886
7
+ data.tar.gz: 88c98411ed8cafa1a598b2aa869831eb93d3c6bc20ed707ad3ec9a4d0056f83a5064f17ec3e8e962d871ca88a683cebe6333161c69ed254244d04beff839ab94
data/README.md CHANGED
@@ -61,6 +61,18 @@ Filey::Comparison.list_outdated(fs_data_source, s3_data_source).each { |filey|
61
61
  }
62
62
  ```
63
63
 
64
+ ## AWS SDK data source
65
+
66
+ ### Specifing custom concurrency level
67
+
68
+ The concurrency level determines the amount of parallel operations that the
69
+ `AwsSdkS3` data source performs against the S3 API.
70
+
71
+ ```ruby
72
+ config = { :concurrency_level => 1000 }
73
+ s3_data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
74
+ ```
75
+
64
76
  ## Example use cases
65
77
 
66
78
  Arnie has a blog on AWS S3. He has just finished a new post and wants to upload
data/changelog.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  This project is [Semantically Versioned](http://semver.org).
4
4
 
5
+ ## 1.3.0
6
+
7
+ * Support custom concurrency level in AwsSdkS3
8
+
5
9
  ## 1.2.4
6
10
 
7
11
  * Prepare for future development by refactoring the concurrency implementation.
data/filey-diff.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'filey-diff'
3
- s.version = '1.2.4'
3
+ s.version = '1.3.0'
4
4
 
5
5
  s.summary = "Compare two data sources that contain file-like objects"
6
6
  s.description =
@@ -1,8 +1,9 @@
1
1
  module Filey
2
2
  module DataSources
3
3
  class AwsSdkS3 < DataSource
4
- def initialize(s3_bucket)
4
+ def initialize(s3_bucket, config = { :concurrency_level => DEFAULT_CONCURRENCY_LEVEL })
5
5
  @s3_bucket = s3_bucket
6
+ @config = config
6
7
  end
7
8
 
8
9
  private
@@ -27,7 +28,7 @@ module Filey
27
28
  if ENV['disable_parallel_processing']
28
29
  jobs.each(&:call)
29
30
  else
30
- jobs.each_slice(DEFAULT_CONCURRENCY_LEVEL) { |jobs|
31
+ jobs.each_slice(slice_size) { |jobs|
31
32
  threads = jobs.map { |job|
32
33
  Thread.new {
33
34
  job.call
@@ -38,6 +39,11 @@ module Filey
38
39
  end
39
40
  end
40
41
 
42
+ def slice_size
43
+ slice_size_from_cfg = @config[:concurrency_level] || @config['concurrency_level']
44
+ slice_size_from_cfg || DEFAULT_CONCURRENCY_LEVEL
45
+ end
46
+
41
47
  def map_s3_object_to_filey(s3_object)
42
48
  if (s3_object.key.include?'/')
43
49
  path = s3_object.key.scan(/(.*\/).*/).first.first
@@ -77,28 +77,55 @@ describe Filey::DataSources::AwsSdkS3 do
77
77
  end
78
78
 
79
79
  context '#in_parallel_or_sequentially' do
80
- describe Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL do
81
- concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
82
80
 
83
- before(:each) {
81
+ shared_examples 'concurrent processing' do |concurrency_level, config|
82
+ let(:operation_on_s3_object) {
84
83
  @ints = []
85
84
  operation_on_s3_object = lambda do |s3_object|
86
85
  @ints << s3_object
87
86
  end
88
- s3_bucket = double('s3_bucket', :objects => (1..(concurrency_level * 2)).map do |int| int end)
89
- data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket)
87
+ }
88
+
89
+ before(:each) {
90
+ s3_bucket = double(
91
+ 's3_bucket',
92
+ :objects => (0..200).map { |int| int }
93
+ )
94
+ data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
90
95
  data_source.send(:in_parallel_or_sequentially, operation_on_s3_object)
91
96
  }
92
97
 
93
- it "does at most #{concurrency_level} ops in parallel" do
98
+ it "honors the concurrency level by processing the first #{concurrency_level} items first" do
94
99
  @ints.take(concurrency_level).all? { |int|
95
100
  int <= concurrency_level
96
101
  }.should be true
102
+ end
103
+
104
+ it "honors the concurrency level by processing the second patch of #{concurrency_level} items second" do
97
105
  @ints.drop(concurrency_level).take(concurrency_level).all? { |int|
98
- int > concurrency_level
106
+ int >= concurrency_level && int < (2 * concurrency_level)
99
107
  }.should be true
100
108
  end
101
109
  end
110
+
111
+ describe 'default concurrency level' do
112
+ concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
113
+
114
+ include_examples(
115
+ 'concurrent processing',
116
+ concurrency_level,
117
+ nil
118
+ )
119
+ end
120
+
121
+ describe 'specifying custom concurrency level' do
122
+ concurrency_level = 20
123
+ include_examples(
124
+ 'concurrent processing',
125
+ concurrency_level,
126
+ { :concurrency_level => concurrency_level }
127
+ )
128
+ end
102
129
  end
103
130
 
104
131
  context 'gzip' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filey-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.4
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lauri Lehmijoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-12 00:00:00.000000000 Z
11
+ date: 2013-07-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake