sluice-jason 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +20 -0
- data/.travis.yml +10 -0
- data/CHANGELOG +94 -0
- data/Gemfile +4 -0
- data/Guardfile +11 -0
- data/LICENSE-2.0.txt +202 -0
- data/README.md +83 -0
- data/Vagrantfile +23 -0
- data/lib/sluice/errors.rb +26 -0
- data/lib/sluice/storage/s3/contracts.rb +32 -0
- data/lib/sluice/storage/s3/location.rb +77 -0
- data/lib/sluice/storage/s3/manifest.rb +129 -0
- data/lib/sluice/storage/s3/s3.rb +704 -0
- data/lib/sluice/storage/storage.rb +111 -0
- data/lib/sluice/version.rb +19 -0
- data/lib/sluice.rb +21 -0
- data/sluice.gemspec +46 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/storage/s3/location_spec.rb +47 -0
- data/spec/storage/s3/s3_spec.rb +42 -0
- data/vagrant/.gitignore +3 -0
- data/vagrant/ansible.hosts +2 -0
- data/vagrant/peru.yaml +14 -0
- data/vagrant/push.bash +79 -0
- data/vagrant/up.bash +50 -0
- data/vagrant/up.guidance +5 -0
- data/vagrant/up.playbooks +1 -0
- metadata +180 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
# Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Authors:: Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
|
13
|
+
# Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
require 'set'
|
17
|
+
|
18
|
+
require 'contracts'
|
19
|
+
include Contracts
|
20
|
+
|
21
|
+
module Sluice
|
22
|
+
module Storage
|
23
|
+
module S3
|
24
|
+
|
25
|
+
# Legitimate manifest scopes:
|
26
|
+
# 1. :filename - store only the filename
|
27
|
+
# in the manifest
|
28
|
+
# 2. :relpath - store the relative path
|
29
|
+
# to the file in the manifest
|
30
|
+
# 3. :abspath - store the absolute path
|
31
|
+
# to the file in the manifest
|
32
|
+
# 4. :bucket - store bucket PLUS absolute
|
33
|
+
# path to the file in the manifest
|
34
|
+
#
|
35
|
+
# TODO: add support for 2-4. Currently only 1 supported
|
36
|
+
class ManifestScope
|
37
|
+
|
38
|
+
@@scopes = Set::[](:filename) # TODO add :relpath, :abspath, :bucket
|
39
|
+
|
40
|
+
def self.valid?(val)
|
41
|
+
val.is_a?(Symbol) &&
|
42
|
+
@@scopes.include?(val)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Class to read and maintain a manifest.
|
47
|
+
class Manifest
|
48
|
+
attr_reader :s3_location, :scope, :manifest_file
|
49
|
+
|
50
|
+
# Manifest constructor
|
51
|
+
#
|
52
|
+
# Parameters:
|
53
|
+
# +path+:: full path to the manifest file
|
54
|
+
# +scope+:: whether file entries in the
|
55
|
+
# manifest should be scoped to
|
56
|
+
# filename, relative path, absolute
|
57
|
+
# path, or absolute path and bucket
|
58
|
+
Contract Location, ManifestScope => nil
|
59
|
+
def initialize(s3_location, scope)
|
60
|
+
@s3_location = s3_location
|
61
|
+
@scope = scope
|
62
|
+
@manifest_file = "%ssluice-%s-manifest" % [s3_location.dir_as_path, scope.to_s]
|
63
|
+
nil
|
64
|
+
end
|
65
|
+
|
66
|
+
# Get the current file entries in the manifest
|
67
|
+
#
|
68
|
+
# Parameters:
|
69
|
+
# +s3+:: A Fog::Storage s3 connection
|
70
|
+
#
|
71
|
+
# Returns an Array of filenames as Strings
|
72
|
+
Contract FogStorage => ArrayOf[String]
|
73
|
+
def get_entries(s3)
|
74
|
+
|
75
|
+
manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
|
76
|
+
if manifest.nil?
|
77
|
+
return []
|
78
|
+
end
|
79
|
+
|
80
|
+
manifest.body.split("\n").reject(&:empty?)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Add (i.e. append) the following file entries
|
84
|
+
# to the manifest
|
85
|
+
# Files listed previously in the manifest will
|
86
|
+
# be kept in the new manifest file.
|
87
|
+
#
|
88
|
+
# Parameters:
|
89
|
+
# +s3+:: A Fog::Storage s3 connection
|
90
|
+
# +entries+:: an Array of filenames as Strings
|
91
|
+
#
|
92
|
+
# Returns all entries now in the manifest
|
93
|
+
Contract FogStorage, ArrayOf[String] => ArrayOf[String]
|
94
|
+
def add_entries(s3, entries)
|
95
|
+
|
96
|
+
existing = get_entries(s3)
|
97
|
+
filenames = entries.map { |filepath|
|
98
|
+
File.basename(filepath)
|
99
|
+
} # TODO: update when non-filename-based manifests supported
|
100
|
+
all = (existing + filenames)
|
101
|
+
|
102
|
+
manifest = self.class.get_manifest(s3, @s3_location, @manifest_file)
|
103
|
+
body = all.join("\n")
|
104
|
+
if manifest.nil?
|
105
|
+
bucket = s3.directories.get(s3_location.bucket).files.create(
|
106
|
+
:key => @manifest_file,
|
107
|
+
:body => body
|
108
|
+
)
|
109
|
+
else
|
110
|
+
manifest.body = body
|
111
|
+
manifest.save
|
112
|
+
end
|
113
|
+
|
114
|
+
all
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
# Helper to get the manifest file
|
120
|
+
Contract FogStorage, Location, String => Maybe[FogFile]
|
121
|
+
def self.get_manifest(s3, s3_location, filename)
|
122
|
+
s3.directories.get(s3_location.bucket, prefix: s3_location.dir).files.get(filename) # TODO: break out into new generic get_file() procedure
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|