sluice 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjFiZjc1OTY1NjY1ZWE2YWY2NDExZWZjMTJkYTQyM2IzYWNmZGVlOA==
5
+ data.tar.gz: !binary |-
6
+ Y2RlZDExYTlmNTEzODgzOTk0NzM2NTU0YWJhYmRjMzE4ZGQ3NjYwZQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ OWNlY2YzMjBiZjYwNzgwYmUzODNhNGM2Yzc3NDEyNzI3MmRkYWFmMTkyMDIy
10
+ NjU4ZWUzZTY0MDg5ZmM1MjNjODJkNjE2MGIxZWQ1YzkwODliNDRjZmU0Y2Q4
11
+ ZWUxODc2M2MxNzdiMDEyZGIwOWMxZWQ2YTM3ZjczNzZjOTQyNGU=
12
+ data.tar.gz: !binary |-
13
+ NmYwZjA1MTU5MzQ2NGEwNjI1Y2FiYmUwZmZhNzg4YjAyODJlNTBkMTRhZGQw
14
+ OTRkZTA1MTZkZDZiMmE2OWQ1OGNhZDUwYTFmYjQyNmQzM2E5YzQ5M2I3MTdi
15
+ NDJkYzkxOTEwM2Q0Njc4ZDU2MzBmYmYyMWRlYWZiMWZkNWY3NTk=
@@ -0,0 +1 @@
1
+ service_name: travis-ci
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ cache: bundler
3
+
4
+ rvm:
5
+ - 1.9.3
6
+ - jruby
7
+ - 2.0.0
8
+ - 2.1.0
9
+
10
+ script: 'bundle exec rspec spec'
data/CHANGELOG CHANGED
@@ -1,3 +1,20 @@
1
+ Version 0.2.0 (2014-05-16)
2
+ --------------------------
3
+ Bumped Contracts to 0.4 (#22)
4
+ Bumped Fog to 1.22.0 (#24)
5
+ Added gem button to README (#18)
6
+ Added Coveralls code coverage to project (#17)
7
+ Added Code Climate button to README (#23)
8
+ Added Travis support to project (#16)
9
+ Added initial unit tests (#15)
10
+ Added in FogFile = Fog::Storage::AWS::File-based contracts (#12)
11
+ Added additional contracts (#20)
12
+ Broke up s3.rb into separate files (#21)
13
+ Updated alter_filename_lambda to accept original filepath as 2nd arg (#19)
14
+ Overrode equality operator for S3::Location to support tests (#10)
15
+ Fixed break bugs in core Sluice process flow (#6)
16
+ Made is_empty? work if another folder starts with the same name as this one (#5)
17
+
1
18
  Version 0.1.5 (2013-10-13)
2
19
  --------------------------
3
20
  Fixed is_empty? returns true if folder contains 1 file (#9)
@@ -0,0 +1,11 @@
1
+ guard 'rspec' do
2
+ # watch /lib/ files
3
+ watch(%r{^lib/(.+).rb$}) do |m|
4
+ "spec/#{m[1]}_spec.rb"
5
+ end
6
+
7
+ # watch /spec/ files
8
+ watch(%r{^spec/(.+).rb$}) do |m|
9
+ "spec/#{m[1]}.rb"
10
+ end
11
+ end
data/README.md CHANGED
@@ -1,4 +1,8 @@
1
1
  # Sluice
2
+ [![Gem Version](https://badge.fury.io/rb/sluice.svg)](http://badge.fury.io/rb/sluice)
3
+ [![Build Status](https://travis-ci.org/snowplow/sluice.png)](https://travis-ci.org/snowplow/sluice)
4
+ [![Code Climate](https://codeclimate.com/github/snowplow/sluice.png)](https://codeclimate.com/github/snowplow/sluice)
5
+ [![Coverage Status](https://coveralls.io/repos/snowplow/sluice/badge.png?branch=master)](https://coveralls.io/r/snowplow/sluice?branch=master)
2
6
 
3
7
  Sluice is a Ruby gem (built with [Bundler] [bundler]) to help you build cloud-friendly ETL (extract, transform, load) processes.
4
8
 
@@ -21,7 +25,7 @@ Sluice has been extracted from a pair of Ruby ETL applications built by the [Sno
21
25
 
22
26
  Or in your Gemfile:
23
27
 
24
- gem 'sluice', '~> 0.1.0'
28
+ gem 'sluice', '~> 0.2.0'
25
29
 
26
30
  ## Usage
27
31
 
@@ -32,7 +36,7 @@ Rubydoc and usage examples to come.
32
36
  To hack on Sluice locally:
33
37
 
34
38
  $ gem build sluice.gemspec
35
- $ sudo gem install sluice-0.1.0.gem
39
+ $ sudo gem install sluice-0.2.0.gem
36
40
 
37
41
  To contribute:
38
42
 
@@ -48,7 +52,7 @@ Sluice was developed by [Alex Dean] [alexanderdean] ([Snowplow Analytics] [snowp
48
52
 
49
53
  ## Copyright and license
50
54
 
51
- Sluice is copyright 2012-2013 Snowplow Analytics Ltd.
55
+ Sluice is copyright 2012-2014 Snowplow Analytics Ltd.
52
56
 
53
57
  Licensed under the [Apache License, Version 2.0] [license] (the "License");
54
58
  you may not use this software except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,14 +10,12 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Author:: Alex Dean (mailto:support@snowplowanalytics.com)
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
16
  require 'sluice/errors'
17
17
  require 'sluice/storage/storage'
18
- require 'sluice/storage/s3'
19
-
20
- module Sluice
21
- NAME = "sluice"
22
- VERSION = "0.1.5"
23
- end
18
+ require 'sluice/storage/s3/contracts'
19
+ require 'sluice/storage/s3/location'
20
+ require 'sluice/storage/s3/manifest'
21
+ require 'sluice/storage/s3/s3'
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,7 +10,7 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Author:: Alex Dean (mailto:support@snowplowanalytics.com)
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
16
  # All errors
@@ -0,0 +1,32 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'fog'
17
+ require 'fog/aws/models/storage/file'
18
+
19
+ require 'contracts'
20
+ include Contracts
21
+
22
+ module Sluice
23
+ module Storage
24
+ module S3
25
+
26
+ # Aliases for Contracts
27
+ FogStorage = Fog::Storage::AWS::Real
28
+ FogFile = Fog::Storage::AWS::File
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,77 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'contracts'
17
+ include Contracts
18
+
19
+ module Sluice
20
+ module Storage
21
+ module S3
22
+
23
+ # Class to describe an S3 location
24
+ # TODO: if we are going to require trailing line-breaks on
25
+ # buckets, maybe we should make that clearer?
26
+ class Location
27
+
28
+ attr_reader :bucket, :dir
29
+
30
+ # Location constructor
31
+ #
32
+ # Parameters:
33
+ # +s3location+:: the s3 location config string e.g. "bucket/directory"
34
+ Contract String => Location
35
+ def initialize(s3_location)
36
+ @s3_location = s3_location
37
+
38
+ s3_location_match = s3_location.match('^s3n?://([^/]+)/?(.*)/$')
39
+ raise ArgumentError, 'Bad S3 location %s' % s3_location unless s3_location_match
40
+
41
+ @bucket = s3_location_match[1]
42
+ @dir = s3_location_match[2]
43
+ self
44
+ end
45
+
46
+ Contract nil => String
47
+ def dir_as_path
48
+ if @dir.length > 0
49
+ return @dir+'/'
50
+ else
51
+ return ''
52
+ end
53
+ end
54
+
55
+ Contract nil => String
56
+ def to_s
57
+ @s3_location
58
+ end
59
+
60
+ Contract Any => Bool
61
+ def ==(o)
62
+ o.class == self.class && o.state == state
63
+ end
64
+ alias_method :eql?, :==
65
+
66
+ protected
67
+
68
+ Contract nil => [String, String, String]
69
+ def state
70
+ [@s3_location, @bucket, @dir]
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,129 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'set'
17
+
18
+ require 'contracts'
19
+ include Contracts
20
+
21
+ module Sluice
22
+ module Storage
23
+ module S3
24
+
25
+ # Legitimate manifest scopes:
26
+ # 1. :filename - store only the filename
27
+ # in the manifest
28
+ # 2. :relpath - store the relative path
29
+ # to the file in the manifest
30
+ # 3. :abspath - store the absolute path
31
+ # to the file in the manifest
32
+ # 4. :bucket - store bucket PLUS absolute
33
+ # path to the file in the manifest
34
+ #
35
+ # TODO: add support for 2-4. Currently only 1 supported
36
+ class ManifestScope
37
+
38
+ @@scopes = Set::[](:filename) # TODO add :relpath, :abspath, :bucket
39
+
40
+ def self.valid?(val)
41
+ val.is_a?(Symbol) &&
42
+ @@scopes.include?(val)
43
+ end
44
+ end
45
+
46
+ # Class to read and maintain a manifest.
47
+ class Manifest
48
+ attr_reader :s3_location, :scope, :manifest_file
49
+
50
+ # Manifest constructor
51
+ #
52
+ # Parameters:
53
+ # +path+:: full path to the manifest file
54
+ # +scope+:: whether file entries in the
55
+ # manifest should be scoped to
56
+ # filename, relative path, absolute
57
+ # path, or absolute path and bucket
58
+ Contract Location, ManifestScope => nil
59
+ def initialize(s3_location, scope)
60
+ @s3_location = s3_location
61
+ @scope = scope
62
+ @manifest_file = "%ssluice-%s-manifest" % [s3_location.dir_as_path, scope.to_s]
63
+ nil
64
+ end
65
+
66
+ # Get the current file entries in the manifest
67
+ #
68
+ # Parameters:
69
+ # +s3+:: A Fog::Storage s3 connection
70
+ #
71
+ # Returns an Array of filenames as Strings
72
+ Contract FogStorage => ArrayOf[String]
73
+ def get_entries(s3)
74
+
75
+ manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
76
+ if manifest.nil?
77
+ return []
78
+ end
79
+
80
+ manifest.body.split("\n").reject(&:empty?)
81
+ end
82
+
83
+ # Add (i.e. append) the following file entries
84
+ # to the manifest
85
+ # Files listed previously in the manifest will
86
+ # be kept in the new manifest file.
87
+ #
88
+ # Parameters:
89
+ # +s3+:: A Fog::Storage s3 connection
90
+ # +entries+:: an Array of filenames as Strings
91
+ #
92
+ # Returns all entries now in the manifest
93
+ Contract FogStorage, ArrayOf[String] => ArrayOf[String]
94
+ def add_entries(s3, entries)
95
+
96
+ existing = get_entries(s3)
97
+ filenames = entries.map { |filepath|
98
+ File.basename(filepath)
99
+ } # TODO: update when non-filename-based manifests supported
100
+ all = (existing + filenames)
101
+
102
+ manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
103
+ body = all.join("\n")
104
+ if manifest.nil?
105
+ bucket = s3.directories.get(s3_location.bucket).files.create(
106
+ :key => @manifest_file,
107
+ :body => body
108
+ )
109
+ else
110
+ manifest.body = body
111
+ manifest.save
112
+ end
113
+
114
+ all
115
+ end
116
+
117
+ private
118
+
119
+ # Helper to get the manifest file
120
+ Contract FogStorage, Location, String => Maybe[FogFile]
121
+ def self.get_manifest(s3, s3_location, filename)
122
+ s3.directories.get(s3_location.bucket, prefix: s3_location.dir).files.get(filename) # TODO: break out into new generic get_file() procedure
123
+ end
124
+
125
+ end
126
+
127
+ end
128
+ end
129
+ end
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,10 +10,9 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
- require 'set'
17
16
  require 'tmpdir'
18
17
  require 'fog'
19
18
  require 'thread'
@@ -35,145 +34,6 @@ module Sluice
35
34
  RETRY_WAIT = 10 # Seconds
36
35
  TIMEOUT_WAIT = 1800 # 30 mins should let even large files upload. +1 https://github.com/snowplow/sluice/issues/7 if this is insufficient or excessive
37
36
 
38
- # Aliases for Contracts
39
- FogStorage = Fog::Storage::AWS::Real
40
- # FogFile = Fog::Storage::AWS::File TODO: fix - gives: warning: toplevel constant File referenced by Fog::Storage::AWS::File
41
-
42
- # Class to describe an S3 location
43
- # TODO: if we are going to impose trailing line-breaks on
44
- # buckets, maybe we should make that clearer?
45
- class Location
46
- attr_reader :bucket, :dir, :s3location
47
-
48
- # Location constructor
49
- #
50
- # Parameters:
51
- # +s3location+:: the s3 location config string e.g. "bucket/directory"
52
- def initialize(s3_location)
53
- @s3_location = s3_location
54
-
55
- s3_location_match = s3_location.match('^s3n?://([^/]+)/?(.*)/$')
56
- raise ArgumentError, 'Bad S3 location %s' % s3_location unless s3_location_match
57
-
58
- @bucket = s3_location_match[1]
59
- @dir = s3_location_match[2]
60
- end
61
-
62
- def dir_as_path
63
- if @dir.length > 0
64
- return @dir+'/'
65
- else
66
- return ''
67
- end
68
- end
69
-
70
- def to_s
71
- @s3_location
72
- end
73
- end
74
-
75
- # Legitimate manifest scopes:
76
- # 1. :filename - store only the filename
77
- # in the manifest
78
- # 2. :relpath - store the relative path
79
- # to the file in the manifest
80
- # 3. :abspath - store the absolute path
81
- # to the file in the manifest
82
- # 4. :bucket - store bucket PLUS absolute
83
- # path to the file in the manifest
84
- #
85
- # TODO: add support for 2-4. Currently only 1 supported
86
- class ManifestScope
87
-
88
- @@scopes = Set::[](:filename) # TODO add :relpath, :abspath, :bucket
89
-
90
- def self.valid?(val)
91
- val.is_a?(Symbol) &&
92
- @@scopes.include?(val)
93
- end
94
- end
95
-
96
- # Class to read and maintain a manifest.
97
- class Manifest
98
- attr_reader :s3_location, :scope, :manifest_file
99
-
100
- # Manifest constructor
101
- #
102
- # Parameters:
103
- # +path+:: full path to the manifest file
104
- # +scope+:: whether file entries in the
105
- # manifest should be scoped to
106
- # filename, relative path, absolute
107
- # path, or absolute path and bucket
108
- Contract Location, ManifestScope => nil
109
- def initialize(s3_location, scope)
110
- @s3_location = s3_location
111
- @scope = scope
112
- @manifest_file = "%ssluice-%s-manifest" % [s3_location.dir_as_path, scope.to_s]
113
- nil
114
- end
115
-
116
- # Get the current file entries in the manifest
117
- #
118
- # Parameters:
119
- # +s3+:: A Fog::Storage s3 connection
120
- #
121
- # Returns an Array of filenames as Strings
122
- Contract FogStorage => ArrayOf[String]
123
- def get_entries(s3)
124
-
125
- manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
126
- if manifest.nil?
127
- return []
128
- end
129
-
130
- manifest.body.split("\n").reject(&:empty?)
131
- end
132
-
133
- # Add (i.e. append) the following file entries
134
- # to the manifest
135
- # Files listed previously in the manifest will
136
- # be kept in the new manifest file.
137
- #
138
- # Parameters:
139
- # +s3+:: A Fog::Storage s3 connection
140
- # +entries+:: an Array of filenames as Strings
141
- #
142
- # Returns all entries now in the manifest
143
- Contract FogStorage, ArrayOf[String] => ArrayOf[String]
144
- def add_entries(s3, entries)
145
-
146
- existing = get_entries(s3)
147
- filenames = entries.map { |filepath|
148
- File.basename(filepath)
149
- } # TODO: update when non-filename-based manifests supported
150
- all = (existing + filenames)
151
-
152
- manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
153
- body = all.join("\n")
154
- if manifest.nil?
155
- bucket = s3.directories.get(s3_location.bucket).files.create(
156
- :key => @manifest_file,
157
- :body => body
158
- )
159
- else
160
- manifest.body = body
161
- manifest.save
162
- end
163
-
164
- all
165
- end
166
-
167
- private
168
-
169
- # Helper to get the manifest file
170
- # Contract FogStorage, Location, String => Or[FogFile, nil] TODO: fix this. Expected: File, Actual: <Fog::Storage::AWS::File>
171
- def self.get_manifest(s3, s3_location, filename)
172
- s3.directories.get(s3_location.bucket, prefix: s3_location.dir).files.get(filename) # TODO: break out into new generic get_file() procedure
173
- end
174
-
175
- end
176
-
177
37
  # Helper function to instantiate a new Fog::Storage
178
38
  # for S3 based on our config options
179
39
  #
@@ -201,8 +61,9 @@ module Sluice
201
61
  # +location+:: The location to return files from
202
62
  #
203
63
  # Returns array of Fog::Storage::AWS::File's
64
+ Contract FogStorage, Location => ArrayOf[FogFile]
204
65
  def list_files(s3, location)
205
- files_and_dirs = s3.directories.get(location.bucket, prefix: location.dir).files
66
+ files_and_dirs = s3.directories.get(location.bucket, prefix: location.dir_as_path).files
206
67
 
207
68
  files = [] # Can't use a .select because of Ruby deep copy issues (array of non-POROs)
208
69
  files_and_dirs.each { |f|
@@ -220,6 +81,7 @@ module Sluice
220
81
  # +path+:: S3 path in String form
221
82
  #
222
83
  # Returns boolean
84
+ Contract String => Bool
223
85
  def is_folder?(path)
224
86
  (path.end_with?('_$folder$') || # EMR-created
225
87
  path.end_with?('/'))
@@ -232,6 +94,7 @@ module Sluice
232
94
  # +path+:: S3 path in String form
233
95
  #
234
96
  # Returns boolean
97
+ Contract String => Bool
235
98
  def is_file?(path)
236
99
  !is_folder?(path)
237
100
  end
@@ -244,6 +107,7 @@ module Sluice
244
107
  #
245
108
  # Returns the basename, or nil if the
246
109
  # path is to a folder
110
+ Contract nil => String
247
111
  def get_basename(path)
248
112
  if is_folder?(path)
249
113
  nil
@@ -263,6 +127,7 @@ module Sluice
263
127
  # Parameters:
264
128
  # +s3+:: A Fog::Storage s3 connection
265
129
  # +location+:: The location to check
130
+ Contract FogStorage, Location => Bool
266
131
  def is_empty?(s3, location)
267
132
  list_files(s3, location).length == 0
268
133
  end
@@ -620,6 +485,7 @@ module Sluice
620
485
  from_path = from_loc.dir_as_path
621
486
  filepath = file.key
622
487
 
488
+ # TODO: clean up following https://github.com/snowplow/sluice/issues/25
623
489
  match = if match_regex_or_glob.is_a? NegativeRegex
624
490
  !filepath.match(match_regex_or_glob.regex)
625
491
  else
@@ -629,19 +495,19 @@ module Sluice
629
495
  end
630
496
  end
631
497
  end
498
+ # End of mutex.synchronize
632
499
 
633
- break unless match
634
- break if is_folder?(filepath)
500
+ # Kill this thread's loop (and thus this thread) if we are complete
501
+ break if complete
502
+
503
+ # Skip processing for a folder or file which doesn't match our regexp or glob
504
+ next if is_folder?(filepath) or not match
635
505
 
636
506
  # Name file
637
507
  basename = get_basename(filepath)
638
508
  next if ignore.include?(basename) # Don't process if in our leave list
639
509
 
640
- if alter_filename_lambda.class == Proc
641
- filename = alter_filename_lambda.call(basename)
642
- else
643
- filename = basename
644
- end
510
+ filename = rename_file(filepath, basename, alter_filename_lambda)
645
511
 
646
512
  # What are we doing? Let's determine source and target
647
513
  # Note that target excludes bucket name where relevant
@@ -724,6 +590,27 @@ module Sluice
724
590
  end
725
591
  module_function :process_files
726
592
 
593
+ # A helper function to rename a file
594
+ # TODO: fixup lambda to be Maybe[Proc]
595
+ Contract String, Maybe[String], Or[Proc, Bool] => Maybe[String]
596
+ def self.rename_file(filepath, basename, lambda=false)
597
+
598
+ if lambda.class == Proc
599
+ case lambda.arity
600
+ when 2
601
+ lambda.call(basename, filepath)
602
+ when 1
603
+ lambda.call(basename)
604
+ when 0
605
+ lambda.call()
606
+ else
607
+ raise StorageOperationError "Expect arity of 0, 1 or 2 for alter_filename_lambda, not #{alter_filename_lambda.arity}"
608
+ end
609
+ else
610
+ basename
611
+ end
612
+ end
613
+
727
614
  # A helper function to list all files
728
615
  # recursively in a folder
729
616
  #
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,7 +10,7 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Author:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
16
  module Sluice
@@ -0,0 +1,19 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Alex Dean (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ module Sluice
17
+ NAME = "sluice"
18
+ VERSION = "0.2.0"
19
+ end
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,13 +10,13 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Author:: Alex Dean (mailto:support@snowplowanalytics.com)
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
16
  # -*- encoding: utf-8 -*-
17
17
  lib = File.expand_path('../lib', __FILE__)
18
18
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
19
- require 'sluice'
19
+ require 'sluice/version'
20
20
 
21
21
  Gem::Specification.new do |gem|
22
22
  gem.authors = ["Alex Dean", "Michael Tibben"]
@@ -30,10 +30,17 @@ Gem::Specification.new do |gem|
30
30
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
31
31
  gem.name = Sluice::NAME
32
32
  gem.version = Sluice::VERSION
33
+ gem.license = "Apache-2.0"
33
34
  gem.platform = Gem::Platform::RUBY
34
35
  gem.require_paths = ["lib"]
35
36
 
36
37
  # Dependencies
37
- gem.add_dependency 'fog', '~> 1.14.0'
38
- gem.add_dependency 'contracts', '~> 0.2.3'
38
+ gem.add_dependency 'contracts', '~> 0.4'
39
+ gem.add_dependency 'fog', '~> 1.22'
40
+
41
+ gem.add_development_dependency "rspec", "~> 2.14", ">= 2.14.1"
42
+ gem.add_development_dependency "rspec-nc"
43
+ gem.add_development_dependency "guard"
44
+ gem.add_development_dependency "guard-rspec"
45
+ gem.add_development_dependency "coveralls"
39
46
  end
@@ -0,0 +1,4 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
4
+ require 'sluice'
@@ -0,0 +1,47 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'spec_helper'
17
+
18
+ Location = Sluice::Storage::S3::Location
19
+
20
+ describe Location do
21
+
22
+ it 'should successfully initialize with a valid S3 (s3://) bucket' do
23
+ loc = Location.new('s3://my-s3-bucket/')
24
+ loc.bucket.should eql 'my-s3-bucket'
25
+ loc.dir.should eql ''
26
+ loc.dir_as_path.should eql ''
27
+ loc.to_s.should eql 's3://my-s3-bucket/'
28
+ end
29
+
30
+ it 'should successfully initialize with a valid S3 (s3n://) bucket' do
31
+ loc = Location.new('s3n://my-s3n-bucket/')
32
+ loc.bucket.should eql 'my-s3n-bucket'
33
+ loc.dir.should eql ''
34
+ loc.dir_as_path.should eql ''
35
+ loc.to_s.should eql 's3n://my-s3n-bucket/'
36
+ end
37
+
38
+ it 'should support object equality tests' do
39
+ loc1 = Location.new('s3n://my-s3n-bucket/hello/blah/')
40
+ loc2 = Location.new('s3n://my-s3n-bucket/hello/blah/')
41
+ loc1.should eql loc2
42
+ loc2.should eql loc1
43
+ loc1.should == loc2
44
+ loc2.should == loc1
45
+ end
46
+
47
+ end
@@ -0,0 +1,42 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'spec_helper'
17
+
18
+ S3 = Sluice::Storage::S3
19
+
20
+ describe S3 do
21
+
22
+ it 'should allow filenames to be renamed' do
23
+
24
+ concat_subdir = lambda { |basename, filepath|
25
+ if m = filepath.match('([^/]+)/[^/]+$')
26
+ return m[1] + '-' + basename
27
+ else
28
+ return basename
29
+ end
30
+ }
31
+
32
+ foobar = lambda {
33
+ 'foobar'
34
+ }
35
+
36
+ S3.rename_file('/dir/subdir/file', 'file', lambda=false).should eql 'file'
37
+ S3.rename_file('/dir/subdir/file', nil, foobar).should eql 'foobar'
38
+ S3.rename_file('resources/environments/logs/publish/e-bgp9nsynv7/i-f2b831bd/_var_log_tomcat7_localhost_access_log.txt-1391958061.gz', '_var_log_tomcat7_localhost_access_log.txt-1391958061.gz', concat_subdir).should eql 'i-f2b831bd-_var_log_tomcat7_localhost_access_log.txt-1391958061.gz'
39
+
40
+ end
41
+
42
+ end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sluice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
5
- prerelease:
4
+ version: 0.2.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Alex Dean
@@ -10,40 +9,112 @@ authors:
10
9
  autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2013-10-13 00:00:00.000000000 Z
12
+ date: 2014-05-16 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
- name: fog
15
+ name: contracts
17
16
  requirement: !ruby/object:Gem::Requirement
18
- none: false
19
17
  requirements:
20
18
  - - ~>
21
19
  - !ruby/object:Gem::Version
22
- version: 1.14.0
20
+ version: '0.4'
23
21
  type: :runtime
24
22
  prerelease: false
25
23
  version_requirements: !ruby/object:Gem::Requirement
26
- none: false
27
24
  requirements:
28
25
  - - ~>
29
26
  - !ruby/object:Gem::Version
30
- version: 1.14.0
27
+ version: '0.4'
31
28
  - !ruby/object:Gem::Dependency
32
- name: contracts
29
+ name: fog
33
30
  requirement: !ruby/object:Gem::Requirement
34
- none: false
35
31
  requirements:
36
32
  - - ~>
37
33
  - !ruby/object:Gem::Version
38
- version: 0.2.3
34
+ version: '1.22'
39
35
  type: :runtime
40
36
  prerelease: false
41
37
  version_requirements: !ruby/object:Gem::Requirement
42
- none: false
43
38
  requirements:
44
39
  - - ~>
45
40
  - !ruby/object:Gem::Version
46
- version: 0.2.3
41
+ version: '1.22'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ~>
47
+ - !ruby/object:Gem::Version
48
+ version: '2.14'
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: 2.14.1
52
+ type: :development
53
+ prerelease: false
54
+ version_requirements: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ version: '2.14'
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 2.14.1
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec-nc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ type: :development
70
+ prerelease: false
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ - !ruby/object:Gem::Dependency
77
+ name: guard
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ type: :development
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ - !ruby/object:Gem::Dependency
91
+ name: guard-rspec
92
+ requirement: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ type: :development
98
+ prerelease: false
99
+ version_requirements: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ - !ruby/object:Gem::Dependency
105
+ name: coveralls
106
+ requirement: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ type: :development
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
47
118
  description: A Ruby gem to help you build ETL processes involving Amazon S3. Uses
48
119
  Fog
49
120
  email:
@@ -52,39 +123,51 @@ executables: []
52
123
  extensions: []
53
124
  extra_rdoc_files: []
54
125
  files:
126
+ - .coveralls.yml
55
127
  - .gitignore
128
+ - .travis.yml
56
129
  - CHANGELOG
57
130
  - Gemfile
131
+ - Guardfile
58
132
  - LICENSE-2.0.txt
59
133
  - README.md
60
- - Rakefile
61
134
  - lib/sluice.rb
62
135
  - lib/sluice/errors.rb
63
- - lib/sluice/storage/s3.rb
136
+ - lib/sluice/storage/s3/contracts.rb
137
+ - lib/sluice/storage/s3/location.rb
138
+ - lib/sluice/storage/s3/manifest.rb
139
+ - lib/sluice/storage/s3/s3.rb
64
140
  - lib/sluice/storage/storage.rb
141
+ - lib/sluice/version.rb
65
142
  - sluice.gemspec
143
+ - spec/spec_helper.rb
144
+ - spec/storage/s3/location_spec.rb
145
+ - spec/storage/s3/s3_spec.rb
66
146
  homepage: http://snowplowanalytics.com
67
- licenses: []
147
+ licenses:
148
+ - Apache-2.0
149
+ metadata: {}
68
150
  post_install_message:
69
151
  rdoc_options: []
70
152
  require_paths:
71
153
  - lib
72
154
  required_ruby_version: !ruby/object:Gem::Requirement
73
- none: false
74
155
  requirements:
75
156
  - - ! '>='
76
157
  - !ruby/object:Gem::Version
77
158
  version: '0'
78
159
  required_rubygems_version: !ruby/object:Gem::Requirement
79
- none: false
80
160
  requirements:
81
161
  - - ! '>='
82
162
  - !ruby/object:Gem::Version
83
163
  version: '0'
84
164
  requirements: []
85
165
  rubyforge_project:
86
- rubygems_version: 1.8.25
166
+ rubygems_version: 2.2.2
87
167
  signing_key:
88
- specification_version: 3
168
+ specification_version: 4
89
169
  summary: Ruby toolkit for cloud-friendly ETL
90
- test_files: []
170
+ test_files:
171
+ - spec/spec_helper.rb
172
+ - spec/storage/s3/location_spec.rb
173
+ - spec/storage/s3/s3_spec.rb
data/Rakefile DELETED
@@ -1,2 +0,0 @@
1
- #!/usr/bin/env rake
2
- require "bundler/gem_tasks"