sluice 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- M2E3ZDFmMDYxZjAzNDQzMTg0Nzk5MjdlN2QwMzY4YmZkOGE3MzU2MQ==
4
+ YWQ2MjFjYmUxNjM1ZTZjNzQ4Njc5MDNhZTFhZmM2ZmNjOWVlZWY5MQ==
5
5
  data.tar.gz: !binary |-
6
- OWJhZTg4MmFhMDBkZjZhZjY3YjU2ZjU4NmQ5ZTZlNmE1MWYyZmUzNQ==
6
+ M2VhZGE0ZDU5ZDJkMWNmOWZlNTQxOWViMzY4N2I0NGQ5N2FjZjQ1Zg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ZTQ3M2MwNjdlYTU1ODc3MmE4YzZiNzBkZjY5MTRiNDNmNTZhY2QyZjFmNGYx
10
- MWNlMTI5Y2UzMmJmZTJhZGJiZjhiYTYzNTIzNGIwYWQ4MmRkYThlYjlhYjVl
11
- Y2Q0MjExZjNlMzZlODFlMDAxODIxY2ZhZTEwMjdlNDc3M2EwY2I=
9
+ MWI2ZjU2NmI5ZDJiNmQ3MzExYjA4ZmFmMzRlNjZlYjg0MzAwZWFjNjIyM2M4
10
+ MjVlZmQ0ODgwOTA1N2M4ODUzNTNhMjYwODMzNmRiNzNkMTczNTA5MGQ1MDU4
11
+ ZmRjMGIwY2Y1Y2I3NDYxNjgzMGVlMDEwMzU2Yjc2MjBiMTNiOTE=
12
12
  data.tar.gz: !binary |-
13
- MTY1YmRhZTAwZjc5NTUxMjUxNDM1NzQyNzY0NmQ2YTI5NDI2NGRiMjA5YTA4
14
- YzBiNjlmNzhjZjI2MDViYTVmOGY1NmQyMmE5ODM1ZmZiMWZjMGQ4YzI5NGQ1
15
- ZTdiZjEzOWQ1YjMyYWQ3ZGI0MWY2NWMyYTk4MDhlMGNmY2JhOWQ=
13
+ NGY3MDRiNjgxOWNiOGUwZThhOGM0MWExYzczM2UxOThlM2FkMDBkYTJmMDYx
14
+ NzkzYTlkYmE3YzQ3ZjNkN2IyY2ExMGU2ZmFhZDBlOGE4NjdlZTBmMDU2NmY0
15
+ NTA1NGM5NGIwZjA0Yzg2MDk5NjZjZGZjZDE4Nzg1ZDk1ZjFmOWU=
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ Version 0.3.1 (2016-07-06)
2
+ --------------------------
3
+ Replace module_function everywhere with self (#46)
4
+
1
5
  Version 0.3.0 (2016-07-05)
2
6
  --------------------------
3
7
  Update Bundler version in Travis (#45)
data/README.md CHANGED
@@ -26,7 +26,7 @@ Sluice has been extracted from a pair of Ruby ETL applications built by the [Sno
26
26
 
27
27
  Or in your Gemfile:
28
28
 
29
- gem 'sluice', '~> 0.3.0'
29
+ gem 'sluice', '~> 0.3.1'
30
30
 
31
31
  ## Usage
32
32
 
@@ -43,7 +43,7 @@ module Sluice
43
43
  # +access_key_id+:: AWS access key ID
44
44
  # +secret_access_key+:: AWS secret access key
45
45
  Contract String, String, String => FogStorage
46
- def new_fog_s3_from(region, access_key_id, secret_access_key)
46
+ def self.new_fog_s3_from(region, access_key_id, secret_access_key)
47
47
  fog = Fog::Storage.new({
48
48
  :provider => 'AWS',
49
49
  :region => region,
@@ -53,7 +53,6 @@ module Sluice
53
53
  fog.sync_clock
54
54
  fog
55
55
  end
56
- module_function :new_fog_s3_from
57
56
 
58
57
  # Return an array of all Fog::Storage::AWS::File's
59
58
  #
@@ -63,7 +62,7 @@ module Sluice
63
62
  #
64
63
  # Returns array of Fog::Storage::AWS::File's
65
64
  Contract FogStorage, Location => ArrayOf[FogFile]
66
- def list_files(s3, location)
65
+ def self.list_files(s3, location)
67
66
  files_and_dirs = s3.directories.get(location.bucket, prefix: location.dir_as_path).files
68
67
 
69
68
  files = [] # Can't use a .select because of Ruby deep copy issues (array of non-POROs)
@@ -74,7 +73,6 @@ module Sluice
74
73
  }
75
74
  files
76
75
  end
77
- module_function :list_files
78
76
 
79
77
  # Whether the given path is a directory or not
80
78
  #
@@ -83,11 +81,10 @@ module Sluice
83
81
  #
84
82
  # Returns boolean
85
83
  Contract String => Bool
86
- def is_folder?(path)
84
+ def self.is_folder?(path)
87
85
  (path.end_with?('_$folder$') || # EMR-created
88
86
  path.end_with?('/'))
89
87
  end
90
- module_function :is_folder?
91
88
 
92
89
  # Whether the given path is a file or not
93
90
  #
@@ -96,10 +93,9 @@ module Sluice
96
93
  #
97
94
  # Returns boolean
98
95
  Contract String => Bool
99
- def is_file?(path)
96
+ def self.is_file?(path)
100
97
  !is_folder?(path)
101
98
  end
102
- module_function :is_file?
103
99
 
104
100
  # Returns the basename for the given path
105
101
  #
@@ -109,7 +105,7 @@ module Sluice
109
105
  # Returns the basename, or nil if the
110
106
  # path is to a folder
111
107
  Contract nil => String
112
- def get_basename(path)
108
+ def self.get_basename(path)
113
109
  if is_folder?(path)
114
110
  nil
115
111
  else
@@ -121,7 +117,6 @@ module Sluice
121
117
  end
122
118
  end
123
119
  end
124
- module_function :get_basename
125
120
 
126
121
  # Determine if a bucket is empty
127
122
  #
@@ -129,10 +124,9 @@ module Sluice
129
124
  # +s3+:: A Fog::Storage s3 connection
130
125
  # +location+:: The location to check
131
126
  Contract FogStorage, Location => Bool
132
- def is_empty?(s3, location)
127
+ def self.is_empty?(s3, location)
133
128
  list_files(s3, location).length == 0
134
129
  end
135
- module_function :is_empty?
136
130
 
137
131
  # Download files from an S3 location to
138
132
  # local storage, concurrently
@@ -142,12 +136,11 @@ module Sluice
142
136
  # +from_files_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, or S3Location to download files from
143
137
  # +to_directory+:: Local directory to copy files to
144
138
  # +match_regex+:: a regex string to match the files to delete
145
- def download_files(s3, from_files_or_loc, to_directory, match_regex='.+')
139
+ def self.download_files(s3, from_files_or_loc, to_directory, match_regex='.+')
146
140
 
147
141
  puts " downloading #{describe_from(from_files_or_loc)} to #{to_directory}"
148
142
  process_files(:download, s3, from_files_or_loc, [], match_regex, to_directory)
149
143
  end
150
- module_function :download_files
151
144
 
152
145
  # Delete files from S3 locations concurrently
153
146
  #
@@ -155,12 +148,11 @@ module Sluice
155
148
  # +s3+:: A Fog::Storage s3 connection
156
149
  # +from_files_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, or S3Location to delete files from
157
150
  # +match_regex+:: a regex string to match the files to delete
158
- def delete_files(s3, from_files_or_loc, match_regex='.+')
151
+ def self.delete_files(s3, from_files_or_loc, match_regex='.+')
159
152
 
160
153
  puts " deleting #{describe_from(from_files_or_loc)}"
161
154
  process_files(:delete, s3, from_files_or_loc, [], match_regex)
162
155
  end
163
- module_function :delete_files
164
156
 
165
157
  # Copies files between S3 locations in two different accounts
166
158
  #
@@ -181,7 +173,7 @@ module Sluice
181
173
  # +match_regex+:: a regex string to match the files to move
182
174
  # +alter_filename_lambda+:: lambda to alter the written filename
183
175
  # +flatten+:: strips off any sub-folders below the from_location
184
- def copy_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
176
+ def self.copy_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
185
177
 
186
178
  puts " copying inter-account #{describe_from(from_location)} to #{to_location}"
187
179
  processed = []
@@ -193,7 +185,6 @@ module Sluice
193
185
 
194
186
  processed
195
187
  end
196
- module_function :copy_files_inter
197
188
 
198
189
  # Copies files between S3 locations concurrently
199
190
  #
@@ -204,12 +195,11 @@ module Sluice
204
195
  # +match_regex+:: a regex string to match the files to copy
205
196
  # +alter_filename_lambda+:: lambda to alter the written filename
206
197
  # +flatten+:: strips off any sub-folders below the from_location
207
- def copy_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
198
+ def self.copy_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
208
199
 
209
200
  puts " copying #{describe_from(from_files_or_loc)} to #{to_location}"
210
201
  process_files(:copy, s3, from_files_or_loc, [], match_regex, to_location, alter_filename_lambda, flatten)
211
202
  end
212
- module_function :copy_files
213
203
 
214
204
  # Copies files between S3 locations maintaining a manifest to
215
205
  # avoid copying a file which was copied previously.
@@ -227,7 +217,7 @@ module Sluice
227
217
  # +match_regex+:: a regex string to match the files to copy
228
218
  # +alter_filename_lambda+:: lambda to alter the written filename
229
219
  # +flatten+:: strips off any sub-folders below the from_location
230
- def copy_files_manifest(s3, manifest, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
220
+ def self.copy_files_manifest(s3, manifest, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
231
221
 
232
222
  puts " copying with manifest #{describe_from(from_files_or_loc)} to #{to_location}"
233
223
  ignore = manifest.get_entries(s3) # Files to leave untouched
@@ -236,7 +226,6 @@ module Sluice
236
226
 
237
227
  processed
238
228
  end
239
- module_function :copy_files_manifest
240
229
 
241
230
  # Moves files between S3 locations in two different accounts
242
231
  #
@@ -255,7 +244,7 @@ module Sluice
255
244
  # +match_regex+:: a regex string to match the files to move
256
245
  # +alter_filename_lambda+:: lambda to alter the written filename
257
246
  # +flatten+:: strips off any sub-folders below the from_location
258
- def move_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
247
+ def self.move_files_inter(from_s3, to_s3, from_location, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
259
248
 
260
249
  puts " moving inter-account #{describe_from(from_location)} to #{to_location}"
261
250
  processed = []
@@ -268,7 +257,6 @@ module Sluice
268
257
 
269
258
  processed
270
259
  end
271
- module_function :move_files_inter
272
260
 
273
261
  # Moves files between S3 locations concurrently
274
262
  #
@@ -279,12 +267,11 @@ module Sluice
279
267
  # +match_regex+:: a regex string to match the files to move
280
268
  # +alter_filename_lambda+:: lambda to alter the written filename
281
269
  # +flatten+:: strips off any sub-folders below the from_location
282
- def move_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
270
+ def self.move_files(s3, from_files_or_loc, to_location, match_regex='.+', alter_filename_lambda=false, flatten=false)
283
271
 
284
272
  puts " moving #{describe_from(from_files_or_loc)} to #{to_location}"
285
273
  process_files(:move, s3, from_files_or_loc, [], match_regex, to_location, alter_filename_lambda, flatten)
286
274
  end
287
- module_function :move_files
288
275
 
289
276
  # Uploads files to S3 locations concurrently
290
277
  #
@@ -293,12 +280,11 @@ module Sluice
293
280
  # +from_files_or_dir+:: Local array of files or local directory to upload files from
294
281
  # +to_location+:: S3Location to upload files to
295
282
  # +match_glob+:: a filesystem glob to match the files to upload
296
- def upload_files(s3, from_files_or_dir, to_location, match_glob='*')
283
+ def self.upload_files(s3, from_files_or_dir, to_location, match_glob='*')
297
284
 
298
285
  puts " uploading #{describe_from(from_files_or_dir)} to #{to_location}"
299
286
  process_files(:upload, s3, from_files_or_dir, [], match_glob, to_location)
300
287
  end
301
- module_function :upload_files
302
288
 
303
289
  # Upload a single file to the exact location specified
304
290
  # Has no intelligence around filenaming.
@@ -308,7 +294,7 @@ module Sluice
308
294
  # +from_file:: A local file path
309
295
  # +to_bucket:: The Fog::Directory to upload to
310
296
  # +to_file:: The file path to upload to
311
- def upload_file(s3, from_file, to_bucket, to_file)
297
+ def self.upload_file(s3, from_file, to_bucket, to_file)
312
298
 
313
299
  local_file = File.open(from_file)
314
300
 
@@ -320,7 +306,6 @@ module Sluice
320
306
 
321
307
  local_file.close
322
308
  end
323
- module_function :upload_file
324
309
 
325
310
  # Download a single file to the exact path specified
326
311
  # Has no intelligence around filenaming.
@@ -330,7 +315,7 @@ module Sluice
330
315
  # +s3+:: A Fog::Storage s3 connection
331
316
  # +from_file:: A Fog::Storage::AWS::File to download
332
317
  # +to_file:: A local file path
333
- def download_file(s3, from_file, to_file)
318
+ def self.download_file(s3, from_file, to_file)
334
319
 
335
320
  FileUtils.mkdir_p(File.dirname(to_file))
336
321
 
@@ -340,7 +325,6 @@ module Sluice
340
325
  local_file.write(from_file.body)
341
326
  local_file.close
342
327
  end
343
- module_function :download_file
344
328
 
345
329
  private
346
330
 
@@ -351,14 +335,13 @@ module Sluice
351
335
  # +from_files_or_dir_or_loc+:: Array of filepaths or Fog::Storage::AWS::File objects, local directory or S3Location to process files from
352
336
  #
353
337
  # Returns a log-friendly string
354
- def describe_from(from_files_or_dir_or_loc)
338
+ def self.describe_from(from_files_or_dir_or_loc)
355
339
  if from_files_or_dir_or_loc.is_a?(Array)
356
340
  "#{from_files_or_dir_or_loc.length} file(s)"
357
341
  else
358
342
  "files from #{from_files_or_dir_or_loc}"
359
343
  end
360
344
  end
361
- module_function :describe_from
362
345
 
363
346
  # Concurrent file operations between S3 locations. Supports:
364
347
  # - Download
@@ -376,7 +359,7 @@ module Sluice
376
359
  # +to_loc_or_dir+:: S3Location or local directory to process files to
377
360
  # +alter_filename_lambda+:: lambda to alter the written filename
378
361
  # +flatten+:: strips off any sub-folders below the from_loc_or_dir
379
- def process_files(operation, s3, from_files_or_dir_or_loc, ignore=[], match_regex_or_glob='.+', to_loc_or_dir=nil, alter_filename_lambda=false, flatten=false)
362
+ def self.process_files(operation, s3, from_files_or_dir_or_loc, ignore=[], match_regex_or_glob='.+', to_loc_or_dir=nil, alter_filename_lambda=false, flatten=false)
380
363
 
381
364
  # Validate that the file operation makes sense
382
365
  case operation
@@ -589,7 +572,6 @@ module Sluice
589
572
 
590
573
  processed_files # Return the processed files
591
574
  end
592
- module_function :process_files
593
575
 
594
576
  # A helper function to rename a file
595
577
  # TODO: fixup lambda to be Maybe[Proc]
@@ -620,12 +602,11 @@ module Sluice
620
602
  # +match_regex+:: a regex string to match the files to copy
621
603
  #
622
604
  # Returns array of files (no sub-directories)
623
- def glob_files(dir, glob)
605
+ def self.glob_files(dir, glob)
624
606
  Dir.glob(File.join(dir, glob)).select { |f|
625
607
  File.file?(f) # Drop sub-directories
626
608
  }
627
609
  end
628
- module_function :glob_files
629
610
 
630
611
  # A helper function to attempt to run a
631
612
  # function retries times
@@ -636,7 +617,7 @@ module Sluice
636
617
  # +retries+:: Number of retries to attempt
637
618
  # +attempt_msg+:: Message to puts on each attempt
638
619
  # +failure_msg+:: Message to puts on each failure
639
- def retry_x(object, send_args, retries, attempt_msg, failure_msg)
620
+ def self.retry_x(object, send_args, retries, attempt_msg, failure_msg)
640
621
  i = 0
641
622
  begin
642
623
  Timeout::timeout(TIMEOUT_WAIT) do # In case our operation times out
@@ -651,7 +632,6 @@ module Sluice
651
632
  retry
652
633
  end
653
634
  end
654
- module_function :retry_x
655
635
 
656
636
  # A helper function to prepare destination
657
637
  # filenames and paths. This is a bit weird
@@ -667,7 +647,7 @@ module Sluice
667
647
  # +flatten+:: strips off any sub-folders below the from_location
668
648
  #
669
649
  # TODO: this badly needs unit tests
670
- def name_file(filepath, new_filename, remove_path=nil, add_path=nil, flatten=false)
650
+ def self.name_file(filepath, new_filename, remove_path=nil, add_path=nil, flatten=false)
671
651
 
672
652
  # First, replace the filename in filepath with new one
673
653
  dirname = File.dirname(filepath)
@@ -698,7 +678,6 @@ module Sluice
698
678
  # Add the new filepath on to the start and return
699
679
  return add_path + shortened_filepath
700
680
  end
701
- module_function :name_file
702
681
 
703
682
  end
704
683
  end
@@ -27,7 +27,7 @@ module Sluice
27
27
  # +end_date+:: end date
28
28
  # +date_format:: format of date in filenames
29
29
  # +file_ext:: extension on files (if any)
30
- def files_between(start_date, end_date, date_format, file_ext=nil)
30
+ def self.files_between(start_date, end_date, date_format, file_ext=nil)
31
31
 
32
32
  dates = []
33
33
  Date.parse(start_date).upto(Date.parse(end_date)) do |day|
@@ -36,19 +36,17 @@ module Sluice
36
36
 
37
37
  '(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext)
38
38
  end
39
- module_function :files_between
40
39
 
41
40
  # Add a trailing slash to a path if missing.
42
41
  # Tolerates a nil path.
43
42
  #
44
43
  # Parameters:
45
44
  # +path+:: path to add a trailing slash to
46
- def trail_slash(path)
45
+ def self.trail_slash(path)
47
46
  unless path.nil?
48
47
  path[-1].chr != '/' ? path << '/' : path
49
48
  end
50
49
  end
51
- module_function :trail_slash
52
50
 
53
51
  # Find files up to (and including) the given date.
54
52
  #
@@ -59,7 +57,7 @@ module Sluice
59
57
  # +end_date+:: end date
60
58
  # +date_format:: format of date in filenames
61
59
  # +file_ext:: extension on files (if any)
62
- def files_up_to(end_date, date_format, file_ext=nil)
60
+ def self.files_up_to(end_date, date_format, file_ext=nil)
63
61
 
64
62
  # Let's create a black list from the day
65
63
  # after the end_date up to today
@@ -73,7 +71,6 @@ module Sluice
73
71
 
74
72
  NegativeRegex.new('(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext))
75
73
  end
76
- module_function :files_up_to
77
74
 
78
75
  # Find files starting from the given date.
79
76
  #
@@ -81,7 +78,7 @@ module Sluice
81
78
  # +start_date+:: start date
82
79
  # +date_format:: format of date in filenames
83
80
  # +file_ext:: extension on files (if any); include period
84
- def files_from(start_date, date_format, file_ext=nil)
81
+ def self.files_from(start_date, date_format, file_ext=nil)
85
82
 
86
83
  # Let's create a white list from the start_date to today
87
84
  today = Date.today
@@ -93,7 +90,6 @@ module Sluice
93
90
 
94
91
  '(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext)
95
92
  end
96
- module_function :files_from
97
93
 
98
94
  private
99
95
 
@@ -102,10 +98,9 @@ module Sluice
102
98
  #
103
99
  # Parameters:
104
100
  # +file_ext:: the file extension to make regexp friendly
105
- def regexify(file_ext)
101
+ def self.regexify(file_ext)
106
102
  file_ext.nil? ? nil : file_ext[0].chr != '.' ? '\\.' << file_ext : '\\' << file_ext
107
103
  end
108
- module_function :regexify
109
104
 
110
105
  end
111
106
  end
@@ -15,5 +15,5 @@
15
15
 
16
16
  module Sluice
17
17
  NAME = "sluice"
18
- VERSION = "0.3.0"
18
+ VERSION = "0.3.1"
19
19
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sluice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dean
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-07-05 00:00:00.000000000 Z
12
+ date: 2016-07-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: contracts