filey-diff 1.2.4 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/changelog.md +4 -0
- data/filey-diff.gemspec +1 -1
- data/lib/filey-diff/data-sources/aws_sdk_s3.rb +8 -2
- data/spec/data_sources_spec.rb +34 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fe9b2c5268647baab2e70424019f87a308c45d4
|
4
|
+
data.tar.gz: 76c1429b9a18f19dae15856372fefa39edb7b45a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ba377502534f9b43ba5d1abf50b1bb15f43dc975863856414e1dc3ea8642f01616869b0cd0578168165fba51fe93df54ca1f662aafe818418482505105709886
|
7
|
+
data.tar.gz: 88c98411ed8cafa1a598b2aa869831eb93d3c6bc20ed707ad3ec9a4d0056f83a5064f17ec3e8e962d871ca88a683cebe6333161c69ed254244d04beff839ab94
|
data/README.md
CHANGED
@@ -61,6 +61,18 @@ Filey::Comparison.list_outdated(fs_data_source, s3_data_source).each { |filey|
|
|
61
61
|
}
|
62
62
|
```
|
63
63
|
|
64
|
+
## AWS SDK data source
|
65
|
+
|
66
|
+
### Specifing custom concurrency level
|
67
|
+
|
68
|
+
The concurrency level determines the amount of parallel operations that the
|
69
|
+
`AwsSdkS3` data source performs against the S3 API.
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
config = { :concurrency_level => 1000 }
|
73
|
+
s3_data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
|
74
|
+
```
|
75
|
+
|
64
76
|
## Example use cases
|
65
77
|
|
66
78
|
Arnie has a blog on AWS S3. He has just finished a new post and wants to upload
|
data/changelog.md
CHANGED
data/filey-diff.gemspec
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
module Filey
|
2
2
|
module DataSources
|
3
3
|
class AwsSdkS3 < DataSource
|
4
|
-
def initialize(s3_bucket)
|
4
|
+
def initialize(s3_bucket, config = { :concurrency_level => DEFAULT_CONCURRENCY_LEVEL })
|
5
5
|
@s3_bucket = s3_bucket
|
6
|
+
@config = config
|
6
7
|
end
|
7
8
|
|
8
9
|
private
|
@@ -27,7 +28,7 @@ module Filey
|
|
27
28
|
if ENV['disable_parallel_processing']
|
28
29
|
jobs.each(&:call)
|
29
30
|
else
|
30
|
-
jobs.each_slice(
|
31
|
+
jobs.each_slice(slice_size) { |jobs|
|
31
32
|
threads = jobs.map { |job|
|
32
33
|
Thread.new {
|
33
34
|
job.call
|
@@ -38,6 +39,11 @@ module Filey
|
|
38
39
|
end
|
39
40
|
end
|
40
41
|
|
42
|
+
def slice_size
|
43
|
+
slice_size_from_cfg = @config[:concurrency_level] || @config['concurrency_level']
|
44
|
+
slice_size_from_cfg || DEFAULT_CONCURRENCY_LEVEL
|
45
|
+
end
|
46
|
+
|
41
47
|
def map_s3_object_to_filey(s3_object)
|
42
48
|
if (s3_object.key.include?'/')
|
43
49
|
path = s3_object.key.scan(/(.*\/).*/).first.first
|
data/spec/data_sources_spec.rb
CHANGED
@@ -77,28 +77,55 @@ describe Filey::DataSources::AwsSdkS3 do
|
|
77
77
|
end
|
78
78
|
|
79
79
|
context '#in_parallel_or_sequentially' do
|
80
|
-
describe Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL do
|
81
|
-
concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
|
82
80
|
|
83
|
-
|
81
|
+
shared_examples 'concurrent processing' do |concurrency_level, config|
|
82
|
+
let(:operation_on_s3_object) {
|
84
83
|
@ints = []
|
85
84
|
operation_on_s3_object = lambda do |s3_object|
|
86
85
|
@ints << s3_object
|
87
86
|
end
|
88
|
-
|
89
|
-
|
87
|
+
}
|
88
|
+
|
89
|
+
before(:each) {
|
90
|
+
s3_bucket = double(
|
91
|
+
's3_bucket',
|
92
|
+
:objects => (0..200).map { |int| int }
|
93
|
+
)
|
94
|
+
data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
|
90
95
|
data_source.send(:in_parallel_or_sequentially, operation_on_s3_object)
|
91
96
|
}
|
92
97
|
|
93
|
-
it "
|
98
|
+
it "honors the concurrency level by processing the first #{concurrency_level} items first" do
|
94
99
|
@ints.take(concurrency_level).all? { |int|
|
95
100
|
int <= concurrency_level
|
96
101
|
}.should be true
|
102
|
+
end
|
103
|
+
|
104
|
+
it "honors the concurrency level by processing the second patch of #{concurrency_level} items second" do
|
97
105
|
@ints.drop(concurrency_level).take(concurrency_level).all? { |int|
|
98
|
-
int
|
106
|
+
int >= concurrency_level && int < (2 * concurrency_level)
|
99
107
|
}.should be true
|
100
108
|
end
|
101
109
|
end
|
110
|
+
|
111
|
+
describe 'default concurrency level' do
|
112
|
+
concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
|
113
|
+
|
114
|
+
include_examples(
|
115
|
+
'concurrent processing',
|
116
|
+
concurrency_level,
|
117
|
+
nil
|
118
|
+
)
|
119
|
+
end
|
120
|
+
|
121
|
+
describe 'specifying custom concurrency level' do
|
122
|
+
concurrency_level = 20
|
123
|
+
include_examples(
|
124
|
+
'concurrent processing',
|
125
|
+
concurrency_level,
|
126
|
+
{ :concurrency_level => concurrency_level }
|
127
|
+
)
|
128
|
+
end
|
102
129
|
end
|
103
130
|
|
104
131
|
context 'gzip' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filey-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lauri Lehmijoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|