sluice 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG +4 -0
- data/README.md +1 -1
- data/lib/sluice/storage/s3/s3.rb +21 -42
- data/lib/sluice/storage/storage.rb +5 -10
- data/lib/sluice/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YWQ2MjFjYmUxNjM1ZTZjNzQ4Njc5MDNhZTFhZmM2ZmNjOWVlZWY5MQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
M2VhZGE0ZDU5ZDJkMWNmOWZlNTQxOWViMzY4N2I0NGQ5N2FjZjQ1Zg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MWI2ZjU2NmI5ZDJiNmQ3MzExYjA4ZmFmMzRlNjZlYjg0MzAwZWFjNjIyM2M4
|
10
|
+
MjVlZmQ0ODgwOTA1N2M4ODUzNTNhMjYwODMzNmRiNzNkMTczNTA5MGQ1MDU4
|
11
|
+
ZmRjMGIwY2Y1Y2I3NDYxNjgzMGVlMDEwMzU2Yjc2MjBiMTNiOTE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NGY3MDRiNjgxOWNiOGUwZThhOGM0MWExYzczM2UxOThlM2FkMDBkYTJmMDYx
|
14
|
+
NzkzYTlkYmE3YzQ3ZjNkN2IyY2ExMGU2ZmFhZDBlOGE4NjdlZTBmMDU2NmY0
|
15
|
+
NTA1NGM5NGIwZjA0Yzg2MDk5NjZjZGZjZDE4Nzg1ZDk1ZjFmOWU=
|
data/CHANGELOG
CHANGED
data/README.md
CHANGED
data/lib/sluice/storage/s3/s3.rb
CHANGED
@@ -43,7 +43,7 @@ module Sluice
|
|
43
43
|
# +access_key_id+:: AWS access key ID
|
44
44
|
# +secret_access_key+:: AWS secret access key
|
45
45
|
Contract String, String, String => FogStorage
|
46
|
-
def new_fog_s3_from(region, access_key_id, secret_access_key)
|
46
|
+
def self.new_fog_s3_from(region, access_key_id, secret_access_key)
|
47
47
|
fog = Fog::Storage.new({
|
48
48
|
:provider => 'AWS',
|
49
49
|
:region => region,
|
@@ -53,7 +53,6 @@ module Sluice
|
|
53
53
|
fog.sync_clock
|
54
54
|
fog
|
55
55
|
end
|
56
|
-
module_function :new_fog_s3_from
|
57
56
|
|
58
57
|
# Return an array of all Fog::Storage::AWS::File's
|
59
58
|
#
|
@@ -63,7 +62,7 @@ module Sluice
|
|
63
62
|
#
|
64
63
|
# Returns array of Fog::Storage::AWS::File's
|
65
64
|
Contract FogStorage, Location => ArrayOf[FogFile]
|
66
|
-
def list_files(s3, location)
|
65
|
+
def self.list_files(s3, location)
|
67
66
|
files_and_dirs = s3.directories.get(location.bucket, prefix: location.dir_as_path).files
|
68
67
|
|
69
68
|
files = [] # Can't use a .select because of Ruby deep copy issues (array of non-POROs)
|
@@ -74,7 +73,6 @@ module Sluice
|
|
74
73
|
}
|
75
74
|
files
|
76
75
|
end
|
77
|
-
module_function :list_files
|
78
76
|
|
79
77
|
# Whether the given path is a directory or not
|
80
78
|
#
|
@@ -83,11 +81,10 @@ module Sluice
|
|
83
81
|
#
|
84
82
|
# Returns boolean
|
85
83
|
Contract String => Bool
|
86
|
-
def is_folder?(path)
|
84
|
+
def self.is_folder?(path)
|
87
85
|
(path.end_with?('_$folder$') || # EMR-created
|
88
86
|
path.end_with?('/'))
|
89
87
|
end
|
90
|
-
module_function :is_folder?
|
91
88
|
|
92
89
|
# Whether the given path is a file or not
|
93
90
|
#
|
@@ -96,10 +93,9 @@ module Sluice
|
|
96
93
|
#
|
97
94
|
# Returns boolean
|
98
95
|
Contract String => Bool
|
99
|
-
def is_file?(path)
|
96
|
+
def self.is_file?(path)
|
100
97
|
!is_folder?(path)
|
101
98
|
end
|
102
|
-
module_function :is_file?
|
103
99
|
|
104
100
|
# Returns the basename for the given path
|
105
101
|
#
|
@@ -109,7 +105,7 @@ module Sluice
|
|
109
105
|
# Returns the basename, or nil if the
|
110
106
|
# path is to a folder
|
111
107
|
Contract nil => String
|
112
|
-
def get_basename(path)
|
108
|
+
def self.get_basename(path)
|
113
109
|
if is_folder?(path)
|
114
110
|
nil
|
115
111
|
else
|
@@ -121,7 +117,6 @@ module Sluice
|
|
121
117
|
end
|
122
118
|
end
|
123
119
|
end
|
124
|
-
module_function :get_basename
|
125
120
|
|
126
121
|
# Determine if a bucket is empty
|
127
122
|
#
|
@@ -129,10 +124,9 @@ module Sluice
|
|
129
124
|
# +s3+:: A Fog::Storage s3 connection
|
130
125
|
# +location+:: The location to check
|
131
126
|
Contract FogStorage, Location => Bool
|
132
|
-
def is_empty?(s3, location)
|
127
|
+
def self.is_empty?(s3, location)
|
133
128
|
list_files(s3, location).length == 0
|
134
129
|
end
|
135
|
-
module_function :is_empty?
|
136
130
|
|
137
131
|
# Download files from an S3 location to
|
138
132
|
# local storage, concurrently
|
@@ -142,12 +136,11 @@ module Sluice
|
|
142
136
|
# +from_files_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, or S3Location to download files from
|
143
137
|
# +to_directory+:: Local directory to copy files to
|
144
138
|
# +match_regex+:: a regex string to match the files to delete
|
145
|
-
def download_files(s3, from_files_or_loc, to_directory, match_regex='.+')
|
139
|
+
def self.download_files(s3, from_files_or_loc, to_directory, match_regex='.+')
|
146
140
|
|
147
141
|
puts " downloading #{describe_from(from_files_or_loc)} to #{to_directory}"
|
148
142
|
process_files(:download, s3, from_files_or_loc, [], match_regex, to_directory)
|
149
143
|
end
|
150
|
-
module_function :download_files
|
151
144
|
|
152
145
|
# Delete files from S3 locations concurrently
|
153
146
|
#
|
@@ -155,12 +148,11 @@ module Sluice
|
|
155
148
|
# +s3+:: A Fog::Storage s3 connection
|
156
149
|
# +from_files_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, or S3Location to delete files from
|
157
150
|
# +match_regex+:: a regex string to match the files to delete
|
158
|
-
def delete_files(s3, from_files_or_loc, match_regex='.+')
|
151
|
+
def self.delete_files(s3, from_files_or_loc, match_regex='.+')
|
159
152
|
|
160
153
|
puts " deleting #{describe_from(from_files_or_loc)}"
|
161
154
|
process_files(:delete, s3, from_files_or_loc, [], match_regex)
|
162
155
|
end
|
163
|
-
module_function :delete_files
|
164
156
|
|
165
157
|
# Copies files between S3 locations in two different accounts
|
166
158
|
#
|
@@ -181,7 +173,7 @@ module Sluice
|
|
181
173
|
# +match_regex+:: a regex string to match the files to move
|
182
174
|
# +alter_filename_lambda+:: lambda to alter the written filename
|
183
175
|
# +flatten+:: strips off any sub-folders below the from_location
|
184
|
-
def copy_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
176
|
+
def self.copy_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
185
177
|
|
186
178
|
puts " copying inter-account #{describe_from(from_location)} to #{to_location}"
|
187
179
|
processed = []
|
@@ -193,7 +185,6 @@ module Sluice
|
|
193
185
|
|
194
186
|
processed
|
195
187
|
end
|
196
|
-
module_function :copy_files_inter
|
197
188
|
|
198
189
|
# Copies files between S3 locations concurrently
|
199
190
|
#
|
@@ -204,12 +195,11 @@ module Sluice
|
|
204
195
|
# +match_regex+:: a regex string to match the files to copy
|
205
196
|
# +alter_filename_lambda+:: lambda to alter the written filename
|
206
197
|
# +flatten+:: strips off any sub-folders below the from_location
|
207
|
-
def copy_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
198
|
+
def self.copy_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
208
199
|
|
209
200
|
puts " copying #{describe_from(from_files_or_loc)} to #{to_location}"
|
210
201
|
process_files(:copy, s3, from_files_or_loc, [], match_regex, to_location, alter_filename_lambda, flatten)
|
211
202
|
end
|
212
|
-
module_function :copy_files
|
213
203
|
|
214
204
|
# Copies files between S3 locations maintaining a manifest to
|
215
205
|
# avoid copying a file which was copied previously.
|
@@ -227,7 +217,7 @@ module Sluice
|
|
227
217
|
# +match_regex+:: a regex string to match the files to copy
|
228
218
|
# +alter_filename_lambda+:: lambda to alter the written filename
|
229
219
|
# +flatten+:: strips off any sub-folders below the from_location
|
230
|
-
def copy_files_manifest(s3, manifest, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
220
|
+
def self.copy_files_manifest(s3, manifest, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
231
221
|
|
232
222
|
puts " copying with manifest #{describe_from(from_files_or_loc)} to #{to_location}"
|
233
223
|
ignore = manifest.get_entries(s3) # Files to leave untouched
|
@@ -236,7 +226,6 @@ module Sluice
|
|
236
226
|
|
237
227
|
processed
|
238
228
|
end
|
239
|
-
module_function :copy_files_manifest
|
240
229
|
|
241
230
|
# Moves files between S3 locations in two different accounts
|
242
231
|
#
|
@@ -255,7 +244,7 @@ module Sluice
|
|
255
244
|
# +match_regex+:: a regex string to match the files to move
|
256
245
|
# +alter_filename_lambda+:: lambda to alter the written filename
|
257
246
|
# +flatten+:: strips off any sub-folders below the from_location
|
258
|
-
def move_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
247
|
+
def self.move_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
259
248
|
|
260
249
|
puts " moving inter-account #{describe_from(from_location)} to #{to_location}"
|
261
250
|
processed = []
|
@@ -268,7 +257,6 @@ module Sluice
|
|
268
257
|
|
269
258
|
processed
|
270
259
|
end
|
271
|
-
module_function :move_files_inter
|
272
260
|
|
273
261
|
# Moves files between S3 locations concurrently
|
274
262
|
#
|
@@ -279,12 +267,11 @@ module Sluice
|
|
279
267
|
# +match_regex+:: a regex string to match the files to move
|
280
268
|
# +alter_filename_lambda+:: lambda to alter the written filename
|
281
269
|
# +flatten+:: strips off any sub-folders below the from_location
|
282
|
-
def move_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
270
|
+
def self.move_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
|
283
271
|
|
284
272
|
puts " moving #{describe_from(from_files_or_loc)} to #{to_location}"
|
285
273
|
process_files(:move, s3, from_files_or_loc, [], match_regex, to_location, alter_filename_lambda, flatten)
|
286
274
|
end
|
287
|
-
module_function :move_files
|
288
275
|
|
289
276
|
# Uploads files to S3 locations concurrently
|
290
277
|
#
|
@@ -293,12 +280,11 @@ module Sluice
|
|
293
280
|
# +from_files_or_dir+:: Local array of files or local directory to upload files from
|
294
281
|
# +to_location+:: S3Location to upload files to
|
295
282
|
# +match_glob+:: a filesystem glob to match the files to upload
|
296
|
-
def upload_files(s3, from_files_or_dir, to_location, match_glob='*')
|
283
|
+
def self.upload_files(s3, from_files_or_dir, to_location, match_glob='*')
|
297
284
|
|
298
285
|
puts " uploading #{describe_from(from_files_or_dir)} to #{to_location}"
|
299
286
|
process_files(:upload, s3, from_files_or_dir, [], match_glob, to_location)
|
300
287
|
end
|
301
|
-
module_function :upload_files
|
302
288
|
|
303
289
|
# Upload a single file to the exact location specified
|
304
290
|
# Has no intelligence around filenaming.
|
@@ -308,7 +294,7 @@ module Sluice
|
|
308
294
|
# +from_file:: A local file path
|
309
295
|
# +to_bucket:: The Fog::Directory to upload to
|
310
296
|
# +to_file:: The file path to upload to
|
311
|
-
def upload_file(s3, from_file, to_bucket, to_file)
|
297
|
+
def self.upload_file(s3, from_file, to_bucket, to_file)
|
312
298
|
|
313
299
|
local_file = File.open(from_file)
|
314
300
|
|
@@ -320,7 +306,6 @@ module Sluice
|
|
320
306
|
|
321
307
|
local_file.close
|
322
308
|
end
|
323
|
-
module_function :upload_file
|
324
309
|
|
325
310
|
# Download a single file to the exact path specified
|
326
311
|
# Has no intelligence around filenaming.
|
@@ -330,7 +315,7 @@ module Sluice
|
|
330
315
|
# +s3+:: A Fog::Storage s3 connection
|
331
316
|
# +from_file:: A Fog::Storage::AWS::File to download
|
332
317
|
# +to_file:: A local file path
|
333
|
-
def download_file(s3, from_file, to_file)
|
318
|
+
def self.download_file(s3, from_file, to_file)
|
334
319
|
|
335
320
|
FileUtils.mkdir_p(File.dirname(to_file))
|
336
321
|
|
@@ -340,7 +325,6 @@ module Sluice
|
|
340
325
|
local_file.write(from_file.body)
|
341
326
|
local_file.close
|
342
327
|
end
|
343
|
-
module_function :download_file
|
344
328
|
|
345
329
|
private
|
346
330
|
|
@@ -351,14 +335,13 @@ module Sluice
|
|
351
335
|
# +from_files_or_dir_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, local directory or S3Location to process files from
|
352
336
|
#
|
353
337
|
# Returns a log-friendly string
|
354
|
-
def describe_from(from_files_or_dir_or_loc)
|
338
|
+
def self.describe_from(from_files_or_dir_or_loc)
|
355
339
|
if from_files_or_dir_or_loc.is_a?(Array)
|
356
340
|
"#{from_files_or_dir_or_loc.length} file(s)"
|
357
341
|
else
|
358
342
|
"files from #{from_files_or_dir_or_loc}"
|
359
343
|
end
|
360
344
|
end
|
361
|
-
module_function :describe_from
|
362
345
|
|
363
346
|
# Concurrent file operations between S3 locations. Supports:
|
364
347
|
# - Download
|
@@ -376,7 +359,7 @@ module Sluice
|
|
376
359
|
# +to_loc_or_dir+:: S3Location or local directory to process files to
|
377
360
|
# +alter_filename_lambda+:: lambda to alter the written filename
|
378
361
|
# +flatten+:: strips off any sub-folders below the from_loc_or_dir
|
379
|
-
def process_files(operation, s3, from_files_or_dir_or_loc, ignore=[], match_regex_or_glob='.+', to_loc_or_dir=nil, alter_filename_lambda=false, flatten=false)
|
362
|
+
def self.process_files(operation, s3, from_files_or_dir_or_loc, ignore=[], match_regex_or_glob='.+', to_loc_or_dir=nil, alter_filename_lambda=false, flatten=false)
|
380
363
|
|
381
364
|
# Validate that the file operation makes sense
|
382
365
|
case operation
|
@@ -589,7 +572,6 @@ module Sluice
|
|
589
572
|
|
590
573
|
processed_files # Return the processed files
|
591
574
|
end
|
592
|
-
module_function :process_files
|
593
575
|
|
594
576
|
# A helper function to rename a file
|
595
577
|
# TODO: fixup lambda to be Maybe[Proc]
|
@@ -620,12 +602,11 @@ module Sluice
|
|
620
602
|
# +match_regex+:: a regex string to match the files to copy
|
621
603
|
#
|
622
604
|
# Returns array of files (no sub-directories)
|
623
|
-
def glob_files(dir, glob)
|
605
|
+
def self.glob_files(dir, glob)
|
624
606
|
Dir.glob(File.join(dir, glob)).select { |f|
|
625
607
|
File.file?(f) # Drop sub-directories
|
626
608
|
}
|
627
609
|
end
|
628
|
-
module_function :glob_files
|
629
610
|
|
630
611
|
# A helper function to attempt to run a
|
631
612
|
# function retries times
|
@@ -636,7 +617,7 @@ module Sluice
|
|
636
617
|
# +retries+:: Number of retries to attempt
|
637
618
|
# +attempt_msg+:: Message to puts on each attempt
|
638
619
|
# +failure_msg+:: Message to puts on each failure
|
639
|
-
def retry_x(object, send_args, retries, attempt_msg, failure_msg)
|
620
|
+
def self.retry_x(object, send_args, retries, attempt_msg, failure_msg)
|
640
621
|
i = 0
|
641
622
|
begin
|
642
623
|
Timeout::timeout(TIMEOUT_WAIT) do # In case our operation times out
|
@@ -651,7 +632,6 @@ module Sluice
|
|
651
632
|
retry
|
652
633
|
end
|
653
634
|
end
|
654
|
-
module_function :retry_x
|
655
635
|
|
656
636
|
# A helper function to prepare destination
|
657
637
|
# filenames and paths. This is a bit weird
|
@@ -667,7 +647,7 @@ module Sluice
|
|
667
647
|
# +flatten+:: strips off any sub-folders below the from_location
|
668
648
|
#
|
669
649
|
# TODO: this badly needs unit tests
|
670
|
-
def name_file(filepath, new_filename, remove_path=nil, add_path=nil, flatten=false)
|
650
|
+
def self.name_file(filepath, new_filename, remove_path=nil, add_path=nil, flatten=false)
|
671
651
|
|
672
652
|
# First, replace the filename in filepath with new one
|
673
653
|
dirname = File.dirname(filepath)
|
@@ -698,7 +678,6 @@ module Sluice
|
|
698
678
|
# Add the new filepath on to the start and return
|
699
679
|
return add_path + shortened_filepath
|
700
680
|
end
|
701
|
-
module_function :name_file
|
702
681
|
|
703
682
|
end
|
704
683
|
end
|
@@ -27,7 +27,7 @@ module Sluice
|
|
27
27
|
# +end_date+:: end date
|
28
28
|
# +date_format:: format of date in filenames
|
29
29
|
# +file_ext:: extension on files (if any)
|
30
|
-
def files_between(start_date, end_date, date_format, file_ext=nil)
|
30
|
+
def self.files_between(start_date, end_date, date_format, file_ext=nil)
|
31
31
|
|
32
32
|
dates = []
|
33
33
|
Date.parse(start_date).upto(Date.parse(end_date)) do |day|
|
@@ -36,19 +36,17 @@ module Sluice
|
|
36
36
|
|
37
37
|
'(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext)
|
38
38
|
end
|
39
|
-
module_function :files_between
|
40
39
|
|
41
40
|
# Add a trailing slash to a path if missing.
|
42
41
|
# Tolerates a nil path.
|
43
42
|
#
|
44
43
|
# Parameters:
|
45
44
|
# +path+:: path to add a trailing slash to
|
46
|
-
def trail_slash(path)
|
45
|
+
def self.trail_slash(path)
|
47
46
|
unless path.nil?
|
48
47
|
path[-1].chr != '/' ? path << '/' : path
|
49
48
|
end
|
50
49
|
end
|
51
|
-
module_function :trail_slash
|
52
50
|
|
53
51
|
# Find files up to (and including) the given date.
|
54
52
|
#
|
@@ -59,7 +57,7 @@ module Sluice
|
|
59
57
|
# +end_date+:: end date
|
60
58
|
# +date_format:: format of date in filenames
|
61
59
|
# +file_ext:: extension on files (if any)
|
62
|
-
def files_up_to(end_date, date_format, file_ext=nil)
|
60
|
+
def self.files_up_to(end_date, date_format, file_ext=nil)
|
63
61
|
|
64
62
|
# Let's create a black list from the day
|
65
63
|
# after the end_date up to today
|
@@ -73,7 +71,6 @@ module Sluice
|
|
73
71
|
|
74
72
|
NegativeRegex.new('(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext))
|
75
73
|
end
|
76
|
-
module_function :files_up_to
|
77
74
|
|
78
75
|
# Find files starting from the given date.
|
79
76
|
#
|
@@ -81,7 +78,7 @@ module Sluice
|
|
81
78
|
# +start_date+:: start date
|
82
79
|
# +date_format:: format of date in filenames
|
83
80
|
# +file_ext:: extension on files (if any); include period
|
84
|
-
def files_from(start_date, date_format, file_ext=nil)
|
81
|
+
def self.files_from(start_date, date_format, file_ext=nil)
|
85
82
|
|
86
83
|
# Let's create a white list from the start_date to today
|
87
84
|
today = Date.today
|
@@ -93,7 +90,6 @@ module Sluice
|
|
93
90
|
|
94
91
|
'(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext)
|
95
92
|
end
|
96
|
-
module_function :files_from
|
97
93
|
|
98
94
|
private
|
99
95
|
|
@@ -102,10 +98,9 @@ module Sluice
|
|
102
98
|
#
|
103
99
|
# Parameters:
|
104
100
|
# +file_ext:: the file extension to make regexp friendly
|
105
|
-
def regexify(file_ext)
|
101
|
+
def self.regexify(file_ext)
|
106
102
|
file_ext.nil? ? nil : file_ext[0].chr != '.' ? '\\.' << file_ext : '\\' << file_ext
|
107
103
|
end
|
108
|
-
module_function :regexify
|
109
104
|
|
110
105
|
end
|
111
106
|
end
|
data/lib/sluice/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sluice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alex Dean
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-07-
|
12
|
+
date: 2016-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: contracts
|