filey-diff 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed51d42ff41aad4b13ab034de0da688e6c76ae59
4
- data.tar.gz: 990b5d2fefb0fc6d019e2de83b694ce2ff27ae24
3
+ metadata.gz: 1fe9b2c5268647baab2e70424019f87a308c45d4
4
+ data.tar.gz: 76c1429b9a18f19dae15856372fefa39edb7b45a
5
5
  SHA512:
6
- metadata.gz: 9f214eea6234bc96138f0508b17cc9966bb6ef605b9b23c2f846017ec9fac11b9bfd28c512f38da93bc7ca25257f692c11f0f38a94e7b235b0828323b429995e
7
- data.tar.gz: e41c9a33c420db5c103a3711416fa01831389c1562e85123427ec588f0fb0e43b3a0859f283809bbd2c55f10dbcd35b9ad4400ed6bada2d3f9dbd2cfc2ef7931
6
+ metadata.gz: ba377502534f9b43ba5d1abf50b1bb15f43dc975863856414e1dc3ea8642f01616869b0cd0578168165fba51fe93df54ca1f662aafe818418482505105709886
7
+ data.tar.gz: 88c98411ed8cafa1a598b2aa869831eb93d3c6bc20ed707ad3ec9a4d0056f83a5064f17ec3e8e962d871ca88a683cebe6333161c69ed254244d04beff839ab94
data/README.md CHANGED
@@ -61,6 +61,18 @@ Filey::Comparison.list_outdated(fs_data_source, s3_data_source).each { |filey|
61
61
  }
62
62
  ```
63
63
 
64
+ ## AWS SDK data source
65
+
66
+ ### Specifing custom concurrency level
67
+
68
+ The concurrency level determines the amount of parallel operations that the
69
+ `AwsSdkS3` data source performs against the S3 API.
70
+
71
+ ```ruby
72
+ config = { :concurrency_level => 1000 }
73
+ s3_data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
74
+ ```
75
+
64
76
  ## Example use cases
65
77
 
66
78
  Arnie has a blog on AWS S3. He has just finished a new post and wants to upload
data/changelog.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  This project is [Semantically Versioned](http://semver.org).
4
4
 
5
+ ## 1.3.0
6
+
7
+ * Support custom concurrency level in AwsSdkS3
8
+
5
9
  ## 1.2.4
6
10
 
7
11
  * Prepare for future development by refactoring the concurrency implementation.
data/filey-diff.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'filey-diff'
3
- s.version = '1.2.4'
3
+ s.version = '1.3.0'
4
4
 
5
5
  s.summary = "Compare two data sources that contain file-like objects"
6
6
  s.description =
@@ -1,8 +1,9 @@
1
1
  module Filey
2
2
  module DataSources
3
3
  class AwsSdkS3 < DataSource
4
- def initialize(s3_bucket)
4
+ def initialize(s3_bucket, config = { :concurrency_level => DEFAULT_CONCURRENCY_LEVEL })
5
5
  @s3_bucket = s3_bucket
6
+ @config = config
6
7
  end
7
8
 
8
9
  private
@@ -27,7 +28,7 @@ module Filey
27
28
  if ENV['disable_parallel_processing']
28
29
  jobs.each(&:call)
29
30
  else
30
- jobs.each_slice(DEFAULT_CONCURRENCY_LEVEL) { |jobs|
31
+ jobs.each_slice(slice_size) { |jobs|
31
32
  threads = jobs.map { |job|
32
33
  Thread.new {
33
34
  job.call
@@ -38,6 +39,11 @@ module Filey
38
39
  end
39
40
  end
40
41
 
42
+ def slice_size
43
+ slice_size_from_cfg = @config[:concurrency_level] || @config['concurrency_level']
44
+ slice_size_from_cfg || DEFAULT_CONCURRENCY_LEVEL
45
+ end
46
+
41
47
  def map_s3_object_to_filey(s3_object)
42
48
  if (s3_object.key.include?'/')
43
49
  path = s3_object.key.scan(/(.*\/).*/).first.first
@@ -77,28 +77,55 @@ describe Filey::DataSources::AwsSdkS3 do
77
77
  end
78
78
 
79
79
  context '#in_parallel_or_sequentially' do
80
- describe Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL do
81
- concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
82
80
 
83
- before(:each) {
81
+ shared_examples 'concurrent processing' do |concurrency_level, config|
82
+ let(:operation_on_s3_object) {
84
83
  @ints = []
85
84
  operation_on_s3_object = lambda do |s3_object|
86
85
  @ints << s3_object
87
86
  end
88
- s3_bucket = double('s3_bucket', :objects => (1..(concurrency_level * 2)).map do |int| int end)
89
- data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket)
87
+ }
88
+
89
+ before(:each) {
90
+ s3_bucket = double(
91
+ 's3_bucket',
92
+ :objects => (0..200).map { |int| int }
93
+ )
94
+ data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
90
95
  data_source.send(:in_parallel_or_sequentially, operation_on_s3_object)
91
96
  }
92
97
 
93
- it "does at most #{concurrency_level} ops in parallel" do
98
+ it "honors the concurrency level by processing the first #{concurrency_level} items first" do
94
99
  @ints.take(concurrency_level).all? { |int|
95
100
  int <= concurrency_level
96
101
  }.should be true
102
+ end
103
+
104
+ it "honors the concurrency level by processing the second patch of #{concurrency_level} items second" do
97
105
  @ints.drop(concurrency_level).take(concurrency_level).all? { |int|
98
- int > concurrency_level
106
+ int >= concurrency_level && int < (2 * concurrency_level)
99
107
  }.should be true
100
108
  end
101
109
  end
110
+
111
+ describe 'default concurrency level' do
112
+ concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
113
+
114
+ include_examples(
115
+ 'concurrent processing',
116
+ concurrency_level,
117
+ nil
118
+ )
119
+ end
120
+
121
+ describe 'specifying custom concurrency level' do
122
+ concurrency_level = 20
123
+ include_examples(
124
+ 'concurrent processing',
125
+ concurrency_level,
126
+ { :concurrency_level => concurrency_level }
127
+ )
128
+ end
102
129
  end
103
130
 
104
131
  context 'gzip' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filey-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.4
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lauri Lehmijoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-12 00:00:00.000000000 Z
11
+ date: 2013-07-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake