filey-diff 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/changelog.md +4 -0
- data/filey-diff.gemspec +1 -1
- data/lib/filey-diff/data-sources/aws_sdk_s3.rb +8 -2
- data/spec/data_sources_spec.rb +34 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fe9b2c5268647baab2e70424019f87a308c45d4
|
4
|
+
data.tar.gz: 76c1429b9a18f19dae15856372fefa39edb7b45a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ba377502534f9b43ba5d1abf50b1bb15f43dc975863856414e1dc3ea8642f01616869b0cd0578168165fba51fe93df54ca1f662aafe818418482505105709886
|
7
|
+
data.tar.gz: 88c98411ed8cafa1a598b2aa869831eb93d3c6bc20ed707ad3ec9a4d0056f83a5064f17ec3e8e962d871ca88a683cebe6333161c69ed254244d04beff839ab94
|
data/README.md
CHANGED
@@ -61,6 +61,18 @@ Filey::Comparison.list_outdated(fs_data_source, s3_data_source).each { |filey|
|
|
61
61
|
}
|
62
62
|
```
|
63
63
|
|
64
|
+
## AWS SDK data source
|
65
|
+
|
66
|
+
### Specifing custom concurrency level
|
67
|
+
|
68
|
+
The concurrency level determines the amount of parallel operations that the
|
69
|
+
`AwsSdkS3` data source performs against the S3 API.
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
config = { :concurrency_level => 1000 }
|
73
|
+
s3_data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
|
74
|
+
```
|
75
|
+
|
64
76
|
## Example use cases
|
65
77
|
|
66
78
|
Arnie has a blog on AWS S3. He has just finished a new post and wants to upload
|
data/changelog.md
CHANGED
data/filey-diff.gemspec
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
module Filey
|
2
2
|
module DataSources
|
3
3
|
class AwsSdkS3 < DataSource
|
4
|
-
def initialize(s3_bucket)
|
4
|
+
def initialize(s3_bucket, config = { :concurrency_level => DEFAULT_CONCURRENCY_LEVEL })
|
5
5
|
@s3_bucket = s3_bucket
|
6
|
+
@config = config
|
6
7
|
end
|
7
8
|
|
8
9
|
private
|
@@ -27,7 +28,7 @@ module Filey
|
|
27
28
|
if ENV['disable_parallel_processing']
|
28
29
|
jobs.each(&:call)
|
29
30
|
else
|
30
|
-
jobs.each_slice(
|
31
|
+
jobs.each_slice(slice_size) { |jobs|
|
31
32
|
threads = jobs.map { |job|
|
32
33
|
Thread.new {
|
33
34
|
job.call
|
@@ -38,6 +39,11 @@ module Filey
|
|
38
39
|
end
|
39
40
|
end
|
40
41
|
|
42
|
+
def slice_size
|
43
|
+
slice_size_from_cfg = @config[:concurrency_level] || @config['concurrency_level']
|
44
|
+
slice_size_from_cfg || DEFAULT_CONCURRENCY_LEVEL
|
45
|
+
end
|
46
|
+
|
41
47
|
def map_s3_object_to_filey(s3_object)
|
42
48
|
if (s3_object.key.include?'/')
|
43
49
|
path = s3_object.key.scan(/(.*\/).*/).first.first
|
data/spec/data_sources_spec.rb
CHANGED
@@ -77,28 +77,55 @@ describe Filey::DataSources::AwsSdkS3 do
|
|
77
77
|
end
|
78
78
|
|
79
79
|
context '#in_parallel_or_sequentially' do
|
80
|
-
describe Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL do
|
81
|
-
concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
|
82
80
|
|
83
|
-
|
81
|
+
shared_examples 'concurrent processing' do |concurrency_level, config|
|
82
|
+
let(:operation_on_s3_object) {
|
84
83
|
@ints = []
|
85
84
|
operation_on_s3_object = lambda do |s3_object|
|
86
85
|
@ints << s3_object
|
87
86
|
end
|
88
|
-
|
89
|
-
|
87
|
+
}
|
88
|
+
|
89
|
+
before(:each) {
|
90
|
+
s3_bucket = double(
|
91
|
+
's3_bucket',
|
92
|
+
:objects => (0..200).map { |int| int }
|
93
|
+
)
|
94
|
+
data_source = Filey::DataSources::AwsSdkS3.new(s3_bucket, config)
|
90
95
|
data_source.send(:in_parallel_or_sequentially, operation_on_s3_object)
|
91
96
|
}
|
92
97
|
|
93
|
-
it "
|
98
|
+
it "honors the concurrency level by processing the first #{concurrency_level} items first" do
|
94
99
|
@ints.take(concurrency_level).all? { |int|
|
95
100
|
int <= concurrency_level
|
96
101
|
}.should be true
|
102
|
+
end
|
103
|
+
|
104
|
+
it "honors the concurrency level by processing the second patch of #{concurrency_level} items second" do
|
97
105
|
@ints.drop(concurrency_level).take(concurrency_level).all? { |int|
|
98
|
-
int
|
106
|
+
int >= concurrency_level && int < (2 * concurrency_level)
|
99
107
|
}.should be true
|
100
108
|
end
|
101
109
|
end
|
110
|
+
|
111
|
+
describe 'default concurrency level' do
|
112
|
+
concurrency_level = Filey::DataSources::AwsSdkS3::DEFAULT_CONCURRENCY_LEVEL
|
113
|
+
|
114
|
+
include_examples(
|
115
|
+
'concurrent processing',
|
116
|
+
concurrency_level,
|
117
|
+
nil
|
118
|
+
)
|
119
|
+
end
|
120
|
+
|
121
|
+
describe 'specifying custom concurrency level' do
|
122
|
+
concurrency_level = 20
|
123
|
+
include_examples(
|
124
|
+
'concurrent processing',
|
125
|
+
concurrency_level,
|
126
|
+
{ :concurrency_level => concurrency_level }
|
127
|
+
)
|
128
|
+
end
|
102
129
|
end
|
103
130
|
|
104
131
|
context 'gzip' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filey-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lauri Lehmijoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|