sluice 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjFiZjc1OTY1NjY1ZWE2YWY2NDExZWZjMTJkYTQyM2IzYWNmZGVlOA==
5
+ data.tar.gz: !binary |-
6
+ Y2RlZDExYTlmNTEzODgzOTk0NzM2NTU0YWJhYmRjMzE4ZGQ3NjYwZQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ OWNlY2YzMjBiZjYwNzgwYmUzODNhNGM2Yzc3NDEyNzI3MmRkYWFmMTkyMDIy
10
+ NjU4ZWUzZTY0MDg5ZmM1MjNjODJkNjE2MGIxZWQ1YzkwODliNDRjZmU0Y2Q4
11
+ ZWUxODc2M2MxNzdiMDEyZGIwOWMxZWQ2YTM3ZjczNzZjOTQyNGU=
12
+ data.tar.gz: !binary |-
13
+ NmYwZjA1MTU5MzQ2NGEwNjI1Y2FiYmUwZmZhNzg4YjAyODJlNTBkMTRhZGQw
14
+ OTRkZTA1MTZkZDZiMmE2OWQ1OGNhZDUwYTFmYjQyNmQzM2E5YzQ5M2I3MTdi
15
+ NDJkYzkxOTEwM2Q0Njc4ZDU2MzBmYmYyMWRlYWZiMWZkNWY3NTk=
@@ -0,0 +1 @@
1
+ service_name: travis-ci
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ cache: bundler
3
+
4
+ rvm:
5
+ - 1.9.3
6
+ - jruby
7
+ - 2.0.0
8
+ - 2.1.0
9
+
10
+ script: 'bundle exec rspec spec'
data/CHANGELOG CHANGED
@@ -1,3 +1,20 @@
1
+ Version 0.2.0 (2014-05-16)
2
+ --------------------------
3
+ Bumped Contracts to 0.4 (#22)
4
+ Bumped Fog to 1.22.0 (#24)
5
+ Added gem button to README (#18)
6
+ Added Coveralls code coverage to project (#17)
7
+ Added Code Climate button to README (#23)
8
+ Added Travis support to project (#16)
9
+ Added initial unit tests (#15)
10
+ Added in FogFile = Fog::Storage::AWS::File-based contracts (#12)
11
+ Added additional contracts (#20)
12
+ Broke up s3.rb into separate files (#21)
13
+ Updated alter_filename_lambda to accept original filepath as 2nd arg (#19)
14
+ Overrode equality operator for S3::Location to support tests (#10)
15
+ Fixed break bugs in core Sluice process flow (#6)
16
+ Made is_empty? work if another folder starts with the same name as this one (#5)
17
+
1
18
  Version 0.1.5 (2013-10-13)
2
19
  --------------------------
3
20
  Fixed is_empty? returns true if folder contains 1 file (#9)
@@ -0,0 +1,11 @@
1
+ guard 'rspec' do
2
+ # watch /lib/ files
3
+ watch(%r{^lib/(.+).rb$}) do |m|
4
+ "spec/#{m[1]}_spec.rb"
5
+ end
6
+
7
+ # watch /spec/ files
8
+ watch(%r{^spec/(.+).rb$}) do |m|
9
+ "spec/#{m[1]}.rb"
10
+ end
11
+ end
data/README.md CHANGED
@@ -1,4 +1,8 @@
1
1
  # Sluice
2
+ [![Gem Version](https://badge.fury.io/rb/sluice.svg)](http://badge.fury.io/rb/sluice)
3
+ [![Build Status](https://travis-ci.org/snowplow/sluice.png)](https://travis-ci.org/snowplow/sluice)
4
+ [![Code Climate](https://codeclimate.com/github/snowplow/sluice.png)](https://codeclimate.com/github/snowplow/sluice)
5
+ [![Coverage Status](https://coveralls.io/repos/snowplow/sluice/badge.png?branch=master)](https://coveralls.io/r/snowplow/sluice?branch=master)
2
6
 
3
7
  Sluice is a Ruby gem (built with [Bundler] [bundler]) to help you build cloud-friendly ETL (extract, transform, load) processes.
4
8
 
@@ -21,7 +25,7 @@ Sluice has been extracted from a pair of Ruby ETL applications built by the [Sno
21
25
 
22
26
  Or in your Gemfile:
23
27
 
24
- gem 'sluice', '~> 0.1.0'
28
+ gem 'sluice', '~> 0.2.0'
25
29
 
26
30
  ## Usage
27
31
 
@@ -32,7 +36,7 @@ Rubydoc and usage examples to come.
32
36
  To hack on Sluice locally:
33
37
 
34
38
  $ gem build sluice.gemspec
35
- $ sudo gem install sluice-0.1.0.gem
39
+ $ sudo gem install sluice-0.2.0.gem
36
40
 
37
41
  To contribute:
38
42
 
@@ -48,7 +52,7 @@ Sluice was developed by [Alex Dean] [alexanderdean] ([Snowplow Analytics] [snowp
48
52
 
49
53
  ## Copyright and license
50
54
 
51
- Sluice is copyright 2012-2013 Snowplow Analytics Ltd.
55
+ Sluice is copyright 2012-2014 Snowplow Analytics Ltd.
52
56
 
53
57
  Licensed under the [Apache License, Version 2.0] [license] (the "License");
54
58
  you may not use this software except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,14 +10,12 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Author:: Alex Dean (mailto:support@snowplowanalytics.com)
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
16
  require 'sluice/errors'
17
17
  require 'sluice/storage/storage'
18
- require 'sluice/storage/s3'
19
-
20
- module Sluice
21
- NAME = "sluice"
22
- VERSION = "0.1.5"
23
- end
18
+ require 'sluice/storage/s3/contracts'
19
+ require 'sluice/storage/s3/location'
20
+ require 'sluice/storage/s3/manifest'
21
+ require 'sluice/storage/s3/s3'
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,7 +10,7 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Author:: Alex Dean (mailto:support@snowplowanalytics.com)
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
16
  # All errors
@@ -0,0 +1,32 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'fog'
17
+ require 'fog/aws/models/storage/file'
18
+
19
+ require 'contracts'
20
+ include Contracts
21
+
22
+ module Sluice
23
+ module Storage
24
+ module S3
25
+
26
+ # Aliases for Contracts
27
+ FogStorage = Fog::Storage::AWS::Real
28
+ FogFile = Fog::Storage::AWS::File
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,77 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'contracts'
17
+ include Contracts
18
+
19
+ module Sluice
20
+ module Storage
21
+ module S3
22
+
23
+ # Class to describe an S3 location
24
+ # TODO: if we are going to require trailing line-breaks on
25
+ # buckets, maybe we should make that clearer?
26
+ class Location
27
+
28
+ attr_reader :bucket, :dir
29
+
30
+ # Location constructor
31
+ #
32
+ # Parameters:
33
+ # +s3location+:: the s3 location config string e.g. "bucket/directory"
34
+ Contract String => Location
35
+ def initialize(s3_location)
36
+ @s3_location = s3_location
37
+
38
+ s3_location_match = s3_location.match('^s3n?://([^/]+)/?(.*)/$')
39
+ raise ArgumentError, 'Bad S3 location %s' % s3_location unless s3_location_match
40
+
41
+ @bucket = s3_location_match[1]
42
+ @dir = s3_location_match[2]
43
+ self
44
+ end
45
+
46
+ Contract nil => String
47
+ def dir_as_path
48
+ if @dir.length > 0
49
+ return @dir+'/'
50
+ else
51
+ return ''
52
+ end
53
+ end
54
+
55
+ Contract nil => String
56
+ def to_s
57
+ @s3_location
58
+ end
59
+
60
+ Contract Any => Bool
61
+ def ==(o)
62
+ o.class == self.class && o.state == state
63
+ end
64
+ alias_method :eql?, :==
65
+
66
+ protected
67
+
68
+ Contract nil => [String, String, String]
69
+ def state
70
+ [@s3_location, @bucket, @dir]
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,129 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'set'
17
+
18
+ require 'contracts'
19
+ include Contracts
20
+
21
+ module Sluice
22
+ module Storage
23
+ module S3
24
+
25
+ # Legitimate manifest scopes:
26
+ # 1. :filename - store only the filename
27
+ # in the manifest
28
+ # 2. :relpath - store the relative path
29
+ # to the file in the manifest
30
+ # 3. :abspath - store the absolute path
31
+ # to the file in the manifest
32
+ # 4. :bucket - store bucket PLUS absolute
33
+ # path to the file in the manifest
34
+ #
35
+ # TODO: add support for 2-4. Currently only 1 supported
36
+ class ManifestScope
37
+
38
+ @@scopes = Set::[](:filename) # TODO add :relpath, :abspath, :bucket
39
+
40
+ def self.valid?(val)
41
+ val.is_a?(Symbol) &&
42
+ @@scopes.include?(val)
43
+ end
44
+ end
45
+
46
+ # Class to read and maintain a manifest.
47
+ class Manifest
48
+ attr_reader :s3_location, :scope, :manifest_file
49
+
50
+ # Manifest constructor
51
+ #
52
+ # Parameters:
53
+ # +path+:: full path to the manifest file
54
+ # +scope+:: whether file entries in the
55
+ # manifest should be scoped to
56
+ # filename, relative path, absolute
57
+ # path, or absolute path and bucket
58
+ Contract Location, ManifestScope => nil
59
+ def initialize(s3_location, scope)
60
+ @s3_location = s3_location
61
+ @scope = scope
62
+ @manifest_file = "%ssluice-%s-manifest" % [s3_location.dir_as_path, scope.to_s]
63
+ nil
64
+ end
65
+
66
+ # Get the current file entries in the manifest
67
+ #
68
+ # Parameters:
69
+ # +s3+:: A Fog::Storage s3 connection
70
+ #
71
+ # Returns an Array of filenames as Strings
72
+ Contract FogStorage => ArrayOf[String]
73
+ def get_entries(s3)
74
+
75
+ manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
76
+ if manifest.nil?
77
+ return []
78
+ end
79
+
80
+ manifest.body.split("\n").reject(&:empty?)
81
+ end
82
+
83
+ # Add (i.e. append) the following file entries
84
+ # to the manifest
85
+ # Files listed previously in the manifest will
86
+ # be kept in the new manifest file.
87
+ #
88
+ # Parameters:
89
+ # +s3+:: A Fog::Storage s3 connection
90
+ # +entries+:: an Array of filenames as Strings
91
+ #
92
+ # Returns all entries now in the manifest
93
+ Contract FogStorage, ArrayOf[String] => ArrayOf[String]
94
+ def add_entries(s3, entries)
95
+
96
+ existing = get_entries(s3)
97
+ filenames = entries.map { |filepath|
98
+ File.basename(filepath)
99
+ } # TODO: update when non-filename-based manifests supported
100
+ all = (existing + filenames)
101
+
102
+ manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
103
+ body = all.join("\n")
104
+ if manifest.nil?
105
+ bucket = s3.directories.get(s3_location.bucket).files.create(
106
+ :key => @manifest_file,
107
+ :body => body
108
+ )
109
+ else
110
+ manifest.body = body
111
+ manifest.save
112
+ end
113
+
114
+ all
115
+ end
116
+
117
+ private
118
+
119
+ # Helper to get the manifest file
120
+ Contract FogStorage, Location, String => Maybe[FogFile]
121
+ def self.get_manifest(s3, s3_location, filename)
122
+ s3.directories.get(s3_location.bucket, prefix: s3_location.dir).files.get(filename) # TODO: break out into new generic get_file() procedure
123
+ end
124
+
125
+ end
126
+
127
+ end
128
+ end
129
+ end
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,10 +10,9 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
- require 'set'
17
16
  require 'tmpdir'
18
17
  require 'fog'
19
18
  require 'thread'
@@ -35,145 +34,6 @@ module Sluice
35
34
  RETRY_WAIT = 10 # Seconds
36
35
  TIMEOUT_WAIT = 1800 # 30 mins should let even large files upload. +1 https://github.com/snowplow/sluice/issues/7 if this is insufficient or excessive
37
36
 
38
- # Aliases for Contracts
39
- FogStorage = Fog::Storage::AWS::Real
40
- # FogFile = Fog::Storage::AWS::File TODO: fix - gives: warning: toplevel constant File referenced by Fog::Storage::AWS::File
41
-
42
- # Class to describe an S3 location
43
- # TODO: if we are going to impose trailing line-breaks on
44
- # buckets, maybe we should make that clearer?
45
- class Location
46
- attr_reader :bucket, :dir, :s3location
47
-
48
- # Location constructor
49
- #
50
- # Parameters:
51
- # +s3location+:: the s3 location config string e.g. "bucket/directory"
52
- def initialize(s3_location)
53
- @s3_location = s3_location
54
-
55
- s3_location_match = s3_location.match('^s3n?://([^/]+)/?(.*)/$')
56
- raise ArgumentError, 'Bad S3 location %s' % s3_location unless s3_location_match
57
-
58
- @bucket = s3_location_match[1]
59
- @dir = s3_location_match[2]
60
- end
61
-
62
- def dir_as_path
63
- if @dir.length > 0
64
- return @dir+'/'
65
- else
66
- return ''
67
- end
68
- end
69
-
70
- def to_s
71
- @s3_location
72
- end
73
- end
74
-
75
- # Legitimate manifest scopes:
76
- # 1. :filename - store only the filename
77
- # in the manifest
78
- # 2. :relpath - store the relative path
79
- # to the file in the manifest
80
- # 3. :abspath - store the absolute path
81
- # to the file in the manifest
82
- # 4. :bucket - store bucket PLUS absolute
83
- # path to the file in the manifest
84
- #
85
- # TODO: add support for 2-4. Currently only 1 supported
86
- class ManifestScope
87
-
88
- @@scopes = Set::[](:filename) # TODO add :relpath, :abspath, :bucket
89
-
90
- def self.valid?(val)
91
- val.is_a?(Symbol) &&
92
- @@scopes.include?(val)
93
- end
94
- end
95
-
96
- # Class to read and maintain a manifest.
97
- class Manifest
98
- attr_reader :s3_location, :scope, :manifest_file
99
-
100
- # Manifest constructor
101
- #
102
- # Parameters:
103
- # +path+:: full path to the manifest file
104
- # +scope+:: whether file entries in the
105
- # manifest should be scoped to
106
- # filename, relative path, absolute
107
- # path, or absolute path and bucket
108
- Contract Location, ManifestScope => nil
109
- def initialize(s3_location, scope)
110
- @s3_location = s3_location
111
- @scope = scope
112
- @manifest_file = "%ssluice-%s-manifest" % [s3_location.dir_as_path, scope.to_s]
113
- nil
114
- end
115
-
116
- # Get the current file entries in the manifest
117
- #
118
- # Parameters:
119
- # +s3+:: A Fog::Storage s3 connection
120
- #
121
- # Returns an Array of filenames as Strings
122
- Contract FogStorage => ArrayOf[String]
123
- def get_entries(s3)
124
-
125
- manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
126
- if manifest.nil?
127
- return []
128
- end
129
-
130
- manifest.body.split("\n").reject(&:empty?)
131
- end
132
-
133
- # Add (i.e. append) the following file entries
134
- # to the manifest
135
- # Files listed previously in the manifest will
136
- # be kept in the new manifest file.
137
- #
138
- # Parameters:
139
- # +s3+:: A Fog::Storage s3 connection
140
- # +entries+:: an Array of filenames as Strings
141
- #
142
- # Returns all entries now in the manifest
143
- Contract FogStorage, ArrayOf[String] => ArrayOf[String]
144
- def add_entries(s3, entries)
145
-
146
- existing = get_entries(s3)
147
- filenames = entries.map { |filepath|
148
- File.basename(filepath)
149
- } # TODO: update when non-filename-based manifests supported
150
- all = (existing + filenames)
151
-
152
- manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
153
- body = all.join("\n")
154
- if manifest.nil?
155
- bucket = s3.directories.get(s3_location.bucket).files.create(
156
- :key => @manifest_file,
157
- :body => body
158
- )
159
- else
160
- manifest.body = body
161
- manifest.save
162
- end
163
-
164
- all
165
- end
166
-
167
- private
168
-
169
- # Helper to get the manifest file
170
- # Contract FogStorage, Location, String => Or[FogFile, nil] TODO: fix this. Expected: File, Actual: <Fog::Storage::AWS::File>
171
- def self.get_manifest(s3, s3_location, filename)
172
- s3.directories.get(s3_location.bucket, prefix: s3_location.dir).files.get(filename) # TODO: break out into new generic get_file() procedure
173
- end
174
-
175
- end
176
-
177
37
  # Helper function to instantiate a new Fog::Storage
178
38
  # for S3 based on our config options
179
39
  #
@@ -201,8 +61,9 @@ module Sluice
201
61
  # +location+:: The location to return files from
202
62
  #
203
63
  # Returns array of Fog::Storage::AWS::File's
64
+ Contract FogStorage, Location => ArrayOf[FogFile]
204
65
  def list_files(s3, location)
205
- files_and_dirs = s3.directories.get(location.bucket, prefix: location.dir).files
66
+ files_and_dirs = s3.directories.get(location.bucket, prefix: location.dir_as_path).files
206
67
 
207
68
  files = [] # Can't use a .select because of Ruby deep copy issues (array of non-POROs)
208
69
  files_and_dirs.each { |f|
@@ -220,6 +81,7 @@ module Sluice
220
81
  # +path+:: S3 path in String form
221
82
  #
222
83
  # Returns boolean
84
+ Contract String => Bool
223
85
  def is_folder?(path)
224
86
  (path.end_with?('_$folder$') || # EMR-created
225
87
  path.end_with?('/'))
@@ -232,6 +94,7 @@ module Sluice
232
94
  # +path+:: S3 path in String form
233
95
  #
234
96
  # Returns boolean
97
+ Contract String => Bool
235
98
  def is_file?(path)
236
99
  !is_folder?(path)
237
100
  end
@@ -244,6 +107,7 @@ module Sluice
244
107
  #
245
108
  # Returns the basename, or nil if the
246
109
  # path is to a folder
110
+ Contract nil => String
247
111
  def get_basename(path)
248
112
  if is_folder?(path)
249
113
  nil
@@ -263,6 +127,7 @@ module Sluice
263
127
  # Parameters:
264
128
  # +s3+:: A Fog::Storage s3 connection
265
129
  # +location+:: The location to check
130
+ Contract FogStorage, Location => Bool
266
131
  def is_empty?(s3, location)
267
132
  list_files(s3, location).length == 0
268
133
  end
@@ -620,6 +485,7 @@ module Sluice
620
485
  from_path = from_loc.dir_as_path
621
486
  filepath = file.key
622
487
 
488
+ # TODO: clean up following https://github.com/snowplow/sluice/issues/25
623
489
  match = if match_regex_or_glob.is_a? NegativeRegex
624
490
  !filepath.match(match_regex_or_glob.regex)
625
491
  else
@@ -629,19 +495,19 @@ module Sluice
629
495
  end
630
496
  end
631
497
  end
498
+ # End of mutex.synchronize
632
499
 
633
- break unless match
634
- break if is_folder?(filepath)
500
+ # Kill this thread's loop (and thus this thread) if we are complete
501
+ break if complete
502
+
503
+ # Skip processing for a folder or file which doesn't match our regexp or glob
504
+ next if is_folder?(filepath) or not match
635
505
 
636
506
  # Name file
637
507
  basename = get_basename(filepath)
638
508
  next if ignore.include?(basename) # Don't process if in our leave list
639
509
 
640
- if alter_filename_lambda.class == Proc
641
- filename = alter_filename_lambda.call(basename)
642
- else
643
- filename = basename
644
- end
510
+ filename = rename_file(filepath, basename, alter_filename_lambda)
645
511
 
646
512
  # What are we doing? Let's determine source and target
647
513
  # Note that target excludes bucket name where relevant
@@ -724,6 +590,27 @@ module Sluice
724
590
  end
725
591
  module_function :process_files
726
592
 
593
+ # A helper function to rename a file
594
+ # TODO: fixup lambda to be Maybe[Proc]
595
+ Contract String, Maybe[String], Or[Proc, Bool] => Maybe[String]
596
+ def self.rename_file(filepath, basename, lambda=false)
597
+
598
+ if lambda.class == Proc
599
+ case lambda.arity
600
+ when 2
601
+ lambda.call(basename, filepath)
602
+ when 1
603
+ lambda.call(basename)
604
+ when 0
605
+ lambda.call()
606
+ else
607
+ raise StorageOperationError "Expect arity of 0, 1 or 2 for alter_filename_lambda, not #{alter_filename_lambda.arity}"
608
+ end
609
+ else
610
+ basename
611
+ end
612
+ end
613
+
727
614
  # A helper function to list all files
728
615
  # recursively in a folder
729
616
  #
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,7 +10,7 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Author:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
16
  module Sluice
@@ -0,0 +1,19 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Author:: Alex Dean (mailto:support@snowplowanalytics.com)
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ module Sluice
17
+ NAME = "sluice"
18
+ VERSION = "0.2.0"
19
+ end
@@ -1,4 +1,4 @@
1
- # Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
2
  #
3
3
  # This program is licensed to you under the Apache License Version 2.0,
4
4
  # and you may not use this file except in compliance with the Apache License Version 2.0.
@@ -10,13 +10,13 @@
10
10
  # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
11
 
12
12
  # Author:: Alex Dean (mailto:support@snowplowanalytics.com)
13
- # Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
14
  # License:: Apache License Version 2.0
15
15
 
16
16
  # -*- encoding: utf-8 -*-
17
17
  lib = File.expand_path('../lib', __FILE__)
18
18
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
19
- require 'sluice'
19
+ require 'sluice/version'
20
20
 
21
21
  Gem::Specification.new do |gem|
22
22
  gem.authors = ["Alex Dean", "Michael Tibben"]
@@ -30,10 +30,17 @@ Gem::Specification.new do |gem|
30
30
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
31
31
  gem.name = Sluice::NAME
32
32
  gem.version = Sluice::VERSION
33
+ gem.license = "Apache-2.0"
33
34
  gem.platform = Gem::Platform::RUBY
34
35
  gem.require_paths = ["lib"]
35
36
 
36
37
  # Dependencies
37
- gem.add_dependency 'fog', '~> 1.14.0'
38
- gem.add_dependency 'contracts', '~> 0.2.3'
38
+ gem.add_dependency 'contracts', '~> 0.4'
39
+ gem.add_dependency 'fog', '~> 1.22'
40
+
41
+ gem.add_development_dependency "rspec", "~> 2.14", ">= 2.14.1"
42
+ gem.add_development_dependency "rspec-nc"
43
+ gem.add_development_dependency "guard"
44
+ gem.add_development_dependency "guard-rspec"
45
+ gem.add_development_dependency "coveralls"
39
46
  end
@@ -0,0 +1,4 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
4
+ require 'sluice'
@@ -0,0 +1,47 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'spec_helper'
17
+
18
+ Location = Sluice::Storage::S3::Location
19
+
20
+ describe Location do
21
+
22
+ it 'should successfully initialize with a valid S3 (s3://) bucket' do
23
+ loc = Location.new('s3://my-s3-bucket/')
24
+ loc.bucket.should eql 'my-s3-bucket'
25
+ loc.dir.should eql ''
26
+ loc.dir_as_path.should eql ''
27
+ loc.to_s.should eql 's3://my-s3-bucket/'
28
+ end
29
+
30
+ it 'should successfully initialize with a valid S3 (s3n://) bucket' do
31
+ loc = Location.new('s3n://my-s3n-bucket/')
32
+ loc.bucket.should eql 'my-s3n-bucket'
33
+ loc.dir.should eql ''
34
+ loc.dir_as_path.should eql ''
35
+ loc.to_s.should eql 's3n://my-s3n-bucket/'
36
+ end
37
+
38
+ it 'should support object equality tests' do
39
+ loc1 = Location.new('s3n://my-s3n-bucket/hello/blah/')
40
+ loc2 = Location.new('s3n://my-s3n-bucket/hello/blah/')
41
+ loc1.should eql loc2
42
+ loc2.should eql loc1
43
+ loc1.should == loc2
44
+ loc2.should == loc1
45
+ end
46
+
47
+ end
@@ -0,0 +1,42 @@
1
+ # Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
2
+ #
3
+ # This program is licensed to you under the Apache License Version 2.0,
4
+ # and you may not use this file except in compliance with the Apache License Version 2.0.
5
+ # You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
6
+ #
7
+ # Unless required by applicable law or agreed to in writing,
8
+ # software distributed under the Apache License Version 2.0 is distributed on an
9
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
11
+
12
+ # Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
13
+ # Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
14
+ # License:: Apache License Version 2.0
15
+
16
+ require 'spec_helper'
17
+
18
+ S3 = Sluice::Storage::S3
19
+
20
+ describe S3 do
21
+
22
+ it 'should allow filenames to be renamed' do
23
+
24
+ concat_subdir = lambda { |basename, filepath|
25
+ if m = filepath.match('([^/]+)/[^/]+$')
26
+ return m[1] + '-' + basename
27
+ else
28
+ return basename
29
+ end
30
+ }
31
+
32
+ foobar = lambda {
33
+ 'foobar'
34
+ }
35
+
36
+ S3.rename_file('/dir/subdir/file', 'file', lambda=false).should eql 'file'
37
+ S3.rename_file('/dir/subdir/file', nil, foobar).should eql 'foobar'
38
+ S3.rename_file('resources/environments/logs/publish/e-bgp9nsynv7/i-f2b831bd/_var_log_tomcat7_localhost_access_log.txt-1391958061.gz', '_var_log_tomcat7_localhost_access_log.txt-1391958061.gz', concat_subdir).should eql 'i-f2b831bd-_var_log_tomcat7_localhost_access_log.txt-1391958061.gz'
39
+
40
+ end
41
+
42
+ end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sluice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
5
- prerelease:
4
+ version: 0.2.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Alex Dean
@@ -10,40 +9,112 @@ authors:
10
9
  autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2013-10-13 00:00:00.000000000 Z
12
+ date: 2014-05-16 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
- name: fog
15
+ name: contracts
17
16
  requirement: !ruby/object:Gem::Requirement
18
- none: false
19
17
  requirements:
20
18
  - - ~>
21
19
  - !ruby/object:Gem::Version
22
- version: 1.14.0
20
+ version: '0.4'
23
21
  type: :runtime
24
22
  prerelease: false
25
23
  version_requirements: !ruby/object:Gem::Requirement
26
- none: false
27
24
  requirements:
28
25
  - - ~>
29
26
  - !ruby/object:Gem::Version
30
- version: 1.14.0
27
+ version: '0.4'
31
28
  - !ruby/object:Gem::Dependency
32
- name: contracts
29
+ name: fog
33
30
  requirement: !ruby/object:Gem::Requirement
34
- none: false
35
31
  requirements:
36
32
  - - ~>
37
33
  - !ruby/object:Gem::Version
38
- version: 0.2.3
34
+ version: '1.22'
39
35
  type: :runtime
40
36
  prerelease: false
41
37
  version_requirements: !ruby/object:Gem::Requirement
42
- none: false
43
38
  requirements:
44
39
  - - ~>
45
40
  - !ruby/object:Gem::Version
46
- version: 0.2.3
41
+ version: '1.22'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ~>
47
+ - !ruby/object:Gem::Version
48
+ version: '2.14'
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: 2.14.1
52
+ type: :development
53
+ prerelease: false
54
+ version_requirements: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ version: '2.14'
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 2.14.1
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec-nc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ type: :development
70
+ prerelease: false
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ - !ruby/object:Gem::Dependency
77
+ name: guard
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ type: :development
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ - !ruby/object:Gem::Dependency
91
+ name: guard-rspec
92
+ requirement: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ type: :development
98
+ prerelease: false
99
+ version_requirements: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ - !ruby/object:Gem::Dependency
105
+ name: coveralls
106
+ requirement: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ type: :development
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
47
118
  description: A Ruby gem to help you build ETL processes involving Amazon S3. Uses
48
119
  Fog
49
120
  email:
@@ -52,39 +123,51 @@ executables: []
52
123
  extensions: []
53
124
  extra_rdoc_files: []
54
125
  files:
126
+ - .coveralls.yml
55
127
  - .gitignore
128
+ - .travis.yml
56
129
  - CHANGELOG
57
130
  - Gemfile
131
+ - Guardfile
58
132
  - LICENSE-2.0.txt
59
133
  - README.md
60
- - Rakefile
61
134
  - lib/sluice.rb
62
135
  - lib/sluice/errors.rb
63
- - lib/sluice/storage/s3.rb
136
+ - lib/sluice/storage/s3/contracts.rb
137
+ - lib/sluice/storage/s3/location.rb
138
+ - lib/sluice/storage/s3/manifest.rb
139
+ - lib/sluice/storage/s3/s3.rb
64
140
  - lib/sluice/storage/storage.rb
141
+ - lib/sluice/version.rb
65
142
  - sluice.gemspec
143
+ - spec/spec_helper.rb
144
+ - spec/storage/s3/location_spec.rb
145
+ - spec/storage/s3/s3_spec.rb
66
146
  homepage: http://snowplowanalytics.com
67
- licenses: []
147
+ licenses:
148
+ - Apache-2.0
149
+ metadata: {}
68
150
  post_install_message:
69
151
  rdoc_options: []
70
152
  require_paths:
71
153
  - lib
72
154
  required_ruby_version: !ruby/object:Gem::Requirement
73
- none: false
74
155
  requirements:
75
156
  - - ! '>='
76
157
  - !ruby/object:Gem::Version
77
158
  version: '0'
78
159
  required_rubygems_version: !ruby/object:Gem::Requirement
79
- none: false
80
160
  requirements:
81
161
  - - ! '>='
82
162
  - !ruby/object:Gem::Version
83
163
  version: '0'
84
164
  requirements: []
85
165
  rubyforge_project:
86
- rubygems_version: 1.8.25
166
+ rubygems_version: 2.2.2
87
167
  signing_key:
88
- specification_version: 3
168
+ specification_version: 4
89
169
  summary: Ruby toolkit for cloud-friendly ETL
90
- test_files: []
170
+ test_files:
171
+ - spec/spec_helper.rb
172
+ - spec/storage/s3/location_spec.rb
173
+ - spec/storage/s3/s3_spec.rb
data/Rakefile DELETED
@@ -1,2 +0,0 @@
1
- #!/usr/bin/env rake
2
- require "bundler/gem_tasks"