sluice 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- M2E3ZDFmMDYxZjAzNDQzMTg0Nzk5MjdlN2QwMzY4YmZkOGE3MzU2MQ==
4
+ YWQ2MjFjYmUxNjM1ZTZjNzQ4Njc5MDNhZTFhZmM2ZmNjOWVlZWY5MQ==
5
5
  data.tar.gz: !binary |-
6
- OWJhZTg4MmFhMDBkZjZhZjY3YjU2ZjU4NmQ5ZTZlNmE1MWYyZmUzNQ==
6
+ M2VhZGE0ZDU5ZDJkMWNmOWZlNTQxOWViMzY4N2I0NGQ5N2FjZjQ1Zg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ZTQ3M2MwNjdlYTU1ODc3MmE4YzZiNzBkZjY5MTRiNDNmNTZhY2QyZjFmNGYx
10
- MWNlMTI5Y2UzMmJmZTJhZGJiZjhiYTYzNTIzNGIwYWQ4MmRkYThlYjlhYjVl
11
- Y2Q0MjExZjNlMzZlODFlMDAxODIxY2ZhZTEwMjdlNDc3M2EwY2I=
9
+ MWI2ZjU2NmI5ZDJiNmQ3MzExYjA4ZmFmMzRlNjZlYjg0MzAwZWFjNjIyM2M4
10
+ MjVlZmQ0ODgwOTA1N2M4ODUzNTNhMjYwODMzNmRiNzNkMTczNTA5MGQ1MDU4
11
+ ZmRjMGIwY2Y1Y2I3NDYxNjgzMGVlMDEwMzU2Yjc2MjBiMTNiOTE=
12
12
  data.tar.gz: !binary |-
13
- MTY1YmRhZTAwZjc5NTUxMjUxNDM1NzQyNzY0NmQ2YTI5NDI2NGRiMjA5YTA4
14
- YzBiNjlmNzhjZjI2MDViYTVmOGY1NmQyMmE5ODM1ZmZiMWZjMGQ4YzI5NGQ1
15
- ZTdiZjEzOWQ1YjMyYWQ3ZGI0MWY2NWMyYTk4MDhlMGNmY2JhOWQ=
13
+ NGY3MDRiNjgxOWNiOGUwZThhOGM0MWExYzczM2UxOThlM2FkMDBkYTJmMDYx
14
+ NzkzYTlkYmE3YzQ3ZjNkN2IyY2ExMGU2ZmFhZDBlOGE4NjdlZTBmMDU2NmY0
15
+ NTA1NGM5NGIwZjA0Yzg2MDk5NjZjZGZjZDE4Nzg1ZDk1ZjFmOWU=
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ Version 0.3.1 (2016-07-06)
2
+ --------------------------
3
+ Replace module_function everywhere with self (#46)
4
+
1
5
  Version 0.3.0 (2016-07-05)
2
6
  --------------------------
3
7
  Update Bundler version in Travis (#45)
data/README.md CHANGED
@@ -26,7 +26,7 @@ Sluice has been extracted from a pair of Ruby ETL applications built by the [Sno
26
26
 
27
27
  Or in your Gemfile:
28
28
 
29
- gem 'sluice', '~> 0.3.0'
29
+ gem 'sluice', '~> 0.3.1'
30
30
 
31
31
  ## Usage
32
32
 
@@ -43,7 +43,7 @@ module Sluice
43
43
  # +access_key_id+:: AWS access key ID
44
44
  # +secret_access_key+:: AWS secret access key
45
45
  Contract String, String, String => FogStorage
46
- def new_fog_s3_from(region, access_key_id, secret_access_key)
46
+ def self.new_fog_s3_from(region, access_key_id, secret_access_key)
47
47
  fog = Fog::Storage.new({
48
48
  :provider => 'AWS',
49
49
  :region => region,
@@ -53,7 +53,6 @@ module Sluice
53
53
  fog.sync_clock
54
54
  fog
55
55
  end
56
- module_function :new_fog_s3_from
57
56
 
58
57
  # Return an array of all Fog::Storage::AWS::File's
59
58
  #
@@ -63,7 +62,7 @@ module Sluice
63
62
  #
64
63
  # Returns array of Fog::Storage::AWS::File's
65
64
  Contract FogStorage, Location => ArrayOf[FogFile]
66
- def list_files(s3, location)
65
+ def self.list_files(s3, location)
67
66
  files_and_dirs = s3.directories.get(location.bucket, prefix: location.dir_as_path).files
68
67
 
69
68
  files = [] # Can't use a .select because of Ruby deep copy issues (array of non-POROs)
@@ -74,7 +73,6 @@ module Sluice
74
73
  }
75
74
  files
76
75
  end
77
- module_function :list_files
78
76
 
79
77
  # Whether the given path is a directory or not
80
78
  #
@@ -83,11 +81,10 @@ module Sluice
83
81
  #
84
82
  # Returns boolean
85
83
  Contract String => Bool
86
- def is_folder?(path)
84
+ def self.is_folder?(path)
87
85
  (path.end_with?('_$folder$') || # EMR-created
88
86
  path.end_with?('/'))
89
87
  end
90
- module_function :is_folder?
91
88
 
92
89
  # Whether the given path is a file or not
93
90
  #
@@ -96,10 +93,9 @@ module Sluice
96
93
  #
97
94
  # Returns boolean
98
95
  Contract String => Bool
99
- def is_file?(path)
96
+ def self.is_file?(path)
100
97
  !is_folder?(path)
101
98
  end
102
- module_function :is_file?
103
99
 
104
100
  # Returns the basename for the given path
105
101
  #
@@ -109,7 +105,7 @@ module Sluice
109
105
  # Returns the basename, or nil if the
110
106
  # path is to a folder
111
107
  Contract nil => String
112
- def get_basename(path)
108
+ def self.get_basename(path)
113
109
  if is_folder?(path)
114
110
  nil
115
111
  else
@@ -121,7 +117,6 @@ module Sluice
121
117
  end
122
118
  end
123
119
  end
124
- module_function :get_basename
125
120
 
126
121
  # Determine if a bucket is empty
127
122
  #
@@ -129,10 +124,9 @@ module Sluice
129
124
  # +s3+:: A Fog::Storage s3 connection
130
125
  # +location+:: The location to check
131
126
  Contract FogStorage, Location => Bool
132
- def is_empty?(s3, location)
127
+ def self.is_empty?(s3, location)
133
128
  list_files(s3, location).length == 0
134
129
  end
135
- module_function :is_empty?
136
130
 
137
131
  # Download files from an S3 location to
138
132
  # local storage, concurrently
@@ -142,12 +136,11 @@ module Sluice
142
136
  # +from_files_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, or S3Location to download files from
143
137
  # +to_directory+:: Local directory to copy files to
144
138
  # +match_regex+:: a regex string to match the files to delete
145
- def download_files(s3, from_files_or_loc, to_directory, match_regex='.+')
139
+ def self.download_files(s3, from_files_or_loc, to_directory, match_regex='.+')
146
140
 
147
141
  puts " downloading #{describe_from(from_files_or_loc)} to #{to_directory}"
148
142
  process_files(:download, s3, from_files_or_loc, [], match_regex, to_directory)
149
143
  end
150
- module_function :download_files
151
144
 
152
145
  # Delete files from S3 locations concurrently
153
146
  #
@@ -155,12 +148,11 @@ module Sluice
155
148
  # +s3+:: A Fog::Storage s3 connection
156
149
  # +from_files_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, or S3Location to delete files from
157
150
  # +match_regex+:: a regex string to match the files to delete
158
- def delete_files(s3, from_files_or_loc, match_regex='.+')
151
+ def self.delete_files(s3, from_files_or_loc, match_regex='.+')
159
152
 
160
153
  puts " deleting #{describe_from(from_files_or_loc)}"
161
154
  process_files(:delete, s3, from_files_or_loc, [], match_regex)
162
155
  end
163
- module_function :delete_files
164
156
 
165
157
  # Copies files between S3 locations in two different accounts
166
158
  #
@@ -181,7 +173,7 @@ module Sluice
181
173
  # +match_regex+:: a regex string to match the files to move
182
174
  # +alter_filename_lambda+:: lambda to alter the written filename
183
175
  # +flatten+:: strips off any sub-folders below the from_location
184
- def copy_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
176
+ def self.copy_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
185
177
 
186
178
  puts " copying inter-account #{describe_from(from_location)} to #{to_location}"
187
179
  processed = []
@@ -193,7 +185,6 @@ module Sluice
193
185
 
194
186
  processed
195
187
  end
196
- module_function :copy_files_inter
197
188
 
198
189
  # Copies files between S3 locations concurrently
199
190
  #
@@ -204,12 +195,11 @@ module Sluice
204
195
  # +match_regex+:: a regex string to match the files to copy
205
196
  # +alter_filename_lambda+:: lambda to alter the written filename
206
197
  # +flatten+:: strips off any sub-folders below the from_location
207
- def copy_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
198
+ def self.copy_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
208
199
 
209
200
  puts " copying #{describe_from(from_files_or_loc)} to #{to_location}"
210
201
  process_files(:copy, s3, from_files_or_loc, [], match_regex, to_location, alter_filename_lambda, flatten)
211
202
  end
212
- module_function :copy_files
213
203
 
214
204
  # Copies files between S3 locations maintaining a manifest to
215
205
  # avoid copying a file which was copied previously.
@@ -227,7 +217,7 @@ module Sluice
227
217
  # +match_regex+:: a regex string to match the files to copy
228
218
  # +alter_filename_lambda+:: lambda to alter the written filename
229
219
  # +flatten+:: strips off any sub-folders below the from_location
230
- def copy_files_manifest(s3, manifest, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
220
+ def self.copy_files_manifest(s3, manifest, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
231
221
 
232
222
  puts " copying with manifest #{describe_from(from_files_or_loc)} to #{to_location}"
233
223
  ignore = manifest.get_entries(s3) # Files to leave untouched
@@ -236,7 +226,6 @@ module Sluice
236
226
 
237
227
  processed
238
228
  end
239
- module_function :copy_files_manifest
240
229
 
241
230
  # Moves files between S3 locations in two different accounts
242
231
  #
@@ -255,7 +244,7 @@ module Sluice
255
244
  # +match_regex+:: a regex string to match the files to move
256
245
  # +alter_filename_lambda+:: lambda to alter the written filename
257
246
  # +flatten+:: strips off any sub-folders below the from_location
258
- def move_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
247
+ def self.move_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
259
248
 
260
249
  puts " moving inter-account #{describe_from(from_location)} to #{to_location}"
261
250
  processed = []
@@ -268,7 +257,6 @@ module Sluice
268
257
 
269
258
  processed
270
259
  end
271
- module_function :move_files_inter
272
260
 
273
261
  # Moves files between S3 locations concurrently
274
262
  #
@@ -279,12 +267,11 @@ module Sluice
279
267
  # +match_regex+:: a regex string to match the files to move
280
268
  # +alter_filename_lambda+:: lambda to alter the written filename
281
269
  # +flatten+:: strips off any sub-folders below the from_location
282
- def move_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
270
+ def self.move_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
283
271
 
284
272
  puts " moving #{describe_from(from_files_or_loc)} to #{to_location}"
285
273
  process_files(:move, s3, from_files_or_loc, [], match_regex, to_location, alter_filename_lambda, flatten)
286
274
  end
287
- module_function :move_files
288
275
 
289
276
  # Uploads files to S3 locations concurrently
290
277
  #
@@ -293,12 +280,11 @@ module Sluice
293
280
  # +from_files_or_dir+:: Local array of files or local directory to upload files from
294
281
  # +to_location+:: S3Location to upload files to
295
282
  # +match_glob+:: a filesystem glob to match the files to upload
296
- def upload_files(s3, from_files_or_dir, to_location, match_glob='*')
283
+ def self.upload_files(s3, from_files_or_dir, to_location, match_glob='*')
297
284
 
298
285
  puts " uploading #{describe_from(from_files_or_dir)} to #{to_location}"
299
286
  process_files(:upload, s3, from_files_or_dir, [], match_glob, to_location)
300
287
  end
301
- module_function :upload_files
302
288
 
303
289
  # Upload a single file to the exact location specified
304
290
  # Has no intelligence around filenaming.
@@ -308,7 +294,7 @@ module Sluice
308
294
  # +from_file:: A local file path
309
295
  # +to_bucket:: The Fog::Directory to upload to
310
296
  # +to_file:: The file path to upload to
311
- def upload_file(s3, from_file, to_bucket, to_file)
297
+ def self.upload_file(s3, from_file, to_bucket, to_file)
312
298
 
313
299
  local_file = File.open(from_file)
314
300
 
@@ -320,7 +306,6 @@ module Sluice
320
306
 
321
307
  local_file.close
322
308
  end
323
- module_function :upload_file
324
309
 
325
310
  # Download a single file to the exact path specified
326
311
  # Has no intelligence around filenaming.
@@ -330,7 +315,7 @@ module Sluice
330
315
  # +s3+:: A Fog::Storage s3 connection
331
316
  # +from_file:: A Fog::Storage::AWS::File to download
332
317
  # +to_file:: A local file path
333
- def download_file(s3, from_file, to_file)
318
+ def self.download_file(s3, from_file, to_file)
334
319
 
335
320
  FileUtils.mkdir_p(File.dirname(to_file))
336
321
 
@@ -340,7 +325,6 @@ module Sluice
340
325
  local_file.write(from_file.body)
341
326
  local_file.close
342
327
  end
343
- module_function :download_file
344
328
 
345
329
  private
346
330
 
@@ -351,14 +335,13 @@ module Sluice
351
335
  # +from_files_or_dir_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, local directory or S3Location to process files from
352
336
  #
353
337
  # Returns a log-friendly string
354
- def describe_from(from_files_or_dir_or_loc)
338
+ def self.describe_from(from_files_or_dir_or_loc)
355
339
  if from_files_or_dir_or_loc.is_a?(Array)
356
340
  "#{from_files_or_dir_or_loc.length} file(s)"
357
341
  else
358
342
  "files from #{from_files_or_dir_or_loc}"
359
343
  end
360
344
  end
361
- module_function :describe_from
362
345
 
363
346
  # Concurrent file operations between S3 locations. Supports:
364
347
  # - Download
@@ -376,7 +359,7 @@ module Sluice
376
359
  # +to_loc_or_dir+:: S3Location or local directory to process files to
377
360
  # +alter_filename_lambda+:: lambda to alter the written filename
378
361
  # +flatten+:: strips off any sub-folders below the from_loc_or_dir
379
- def process_files(operation, s3, from_files_or_dir_or_loc, ignore=[], match_regex_or_glob='.+', to_loc_or_dir=nil, alter_filename_lambda=false, flatten=false)
362
+ def self.process_files(operation, s3, from_files_or_dir_or_loc, ignore=[], match_regex_or_glob='.+', to_loc_or_dir=nil, alter_filename_lambda=false, flatten=false)
380
363
 
381
364
  # Validate that the file operation makes sense
382
365
  case operation
@@ -589,7 +572,6 @@ module Sluice
589
572
 
590
573
  processed_files # Return the processed files
591
574
  end
592
- module_function :process_files
593
575
 
594
576
  # A helper function to rename a file
595
577
  # TODO: fixup lambda to be Maybe[Proc]
@@ -620,12 +602,11 @@ module Sluice
620
602
  # +match_regex+:: a regex string to match the files to copy
621
603
  #
622
604
  # Returns array of files (no sub-directories)
623
- def glob_files(dir, glob)
605
+ def self.glob_files(dir, glob)
624
606
  Dir.glob(File.join(dir, glob)).select { |f|
625
607
  File.file?(f) # Drop sub-directories
626
608
  }
627
609
  end
628
- module_function :glob_files
629
610
 
630
611
  # A helper function to attempt to run a
631
612
  # function retries times
@@ -636,7 +617,7 @@ module Sluice
636
617
  # +retries+:: Number of retries to attempt
637
618
  # +attempt_msg+:: Message to puts on each attempt
638
619
  # +failure_msg+:: Message to puts on each failure
639
- def retry_x(object, send_args, retries, attempt_msg, failure_msg)
620
+ def self.retry_x(object, send_args, retries, attempt_msg, failure_msg)
640
621
  i = 0
641
622
  begin
642
623
  Timeout::timeout(TIMEOUT_WAIT) do # In case our operation times out
@@ -651,7 +632,6 @@ module Sluice
651
632
  retry
652
633
  end
653
634
  end
654
- module_function :retry_x
655
635
 
656
636
  # A helper function to prepare destination
657
637
  # filenames and paths. This is a bit weird
@@ -667,7 +647,7 @@ module Sluice
667
647
  # +flatten+:: strips off any sub-folders below the from_location
668
648
  #
669
649
  # TODO: this badly needs unit tests
670
- def name_file(filepath, new_filename, remove_path=nil, add_path=nil, flatten=false)
650
+ def self.name_file(filepath, new_filename, remove_path=nil, add_path=nil, flatten=false)
671
651
 
672
652
  # First, replace the filename in filepath with new one
673
653
  dirname = File.dirname(filepath)
@@ -698,7 +678,6 @@ module Sluice
698
678
  # Add the new filepath on to the start and return
699
679
  return add_path + shortened_filepath
700
680
  end
701
- module_function :name_file
702
681
 
703
682
  end
704
683
  end
@@ -27,7 +27,7 @@ module Sluice
27
27
  # +end_date+:: end date
28
28
  # +date_format:: format of date in filenames
29
29
  # +file_ext:: extension on files (if any)
30
- def files_between(start_date, end_date, date_format, file_ext=nil)
30
+ def self.files_between(start_date, end_date, date_format, file_ext=nil)
31
31
 
32
32
  dates = []
33
33
  Date.parse(start_date).upto(Date.parse(end_date)) do |day|
@@ -36,19 +36,17 @@ module Sluice
36
36
 
37
37
  '(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext)
38
38
  end
39
- module_function :files_between
40
39
 
41
40
  # Add a trailing slash to a path if missing.
42
41
  # Tolerates a nil path.
43
42
  #
44
43
  # Parameters:
45
44
  # +path+:: path to add a trailing slash to
46
- def trail_slash(path)
45
+ def self.trail_slash(path)
47
46
  unless path.nil?
48
47
  path[-1].chr != '/' ? path << '/' : path
49
48
  end
50
49
  end
51
- module_function :trail_slash
52
50
 
53
51
  # Find files up to (and including) the given date.
54
52
  #
@@ -59,7 +57,7 @@ module Sluice
59
57
  # +end_date+:: end date
60
58
  # +date_format:: format of date in filenames
61
59
  # +file_ext:: extension on files (if any)
62
- def files_up_to(end_date, date_format, file_ext=nil)
60
+ def self.files_up_to(end_date, date_format, file_ext=nil)
63
61
 
64
62
  # Let's create a black list from the day
65
63
  # after the end_date up to today
@@ -73,7 +71,6 @@ module Sluice
73
71
 
74
72
  NegativeRegex.new('(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext))
75
73
  end
76
- module_function :files_up_to
77
74
 
78
75
  # Find files starting from the given date.
79
76
  #
@@ -81,7 +78,7 @@ module Sluice
81
78
  # +start_date+:: start date
82
79
  # +date_format:: format of date in filenames
83
80
  # +file_ext:: extension on files (if any); include period
84
- def files_from(start_date, date_format, file_ext=nil)
81
+ def self.files_from(start_date, date_format, file_ext=nil)
85
82
 
86
83
  # Let's create a white list from the start_date to today
87
84
  today = Date.today
@@ -93,7 +90,6 @@ module Sluice
93
90
 
94
91
  '(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext)
95
92
  end
96
- module_function :files_from
97
93
 
98
94
  private
99
95
 
@@ -102,10 +98,9 @@ module Sluice
102
98
  #
103
99
  # Parameters:
104
100
  # +file_ext:: the file extension to make regexp friendly
105
- def regexify(file_ext)
101
+ def self.regexify(file_ext)
106
102
  file_ext.nil? ? nil : file_ext[0].chr != '.' ? '\\.' << file_ext : '\\' << file_ext
107
103
  end
108
- module_function :regexify
109
104
 
110
105
  end
111
106
  end
@@ -15,5 +15,5 @@
15
15
 
16
16
  module Sluice
17
17
  NAME = "sluice"
18
- VERSION = "0.3.0"
18
+ VERSION = "0.3.1"
19
19
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sluice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dean
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-07-05 00:00:00.000000000 Z
12
+ date: 2016-07-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: contracts