s3-client 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +174 -0
- data/Rakefile +40 -0
- data/config/settings.yml +11 -0
- data/lib/s3/client.rb +31 -0
- data/lib/s3/client/api.rb +258 -0
- data/lib/s3/client/api/rest_parameter.rb +149 -0
- data/lib/s3/client/api/storage.rb +363 -0
- data/lib/s3/client/exception.rb +20 -0
- data/lib/s3/client/model/bucket.rb +20 -0
- data/lib/s3/client/model/bucket_collection.rb +35 -0
- data/lib/s3/client/model/concerns/buckets_result.rb +21 -0
- data/lib/s3/client/model/concerns/objects_result.rb +35 -0
- data/lib/s3/client/model/object.rb +56 -0
- data/lib/s3/client/model/object_collection.rb +65 -0
- data/lib/s3/client/storage.rb +43 -0
- data/lib/s3/settings.rb +9 -0
- data/lib/s3/version.rb +3 -0
- data/s3-client.gemspec +30 -0
- metadata +176 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
module S3
|
2
|
+
class Client::API
|
3
|
+
class RestParameter
|
4
|
+
|
5
|
+
def initialize(method, resource, cano_resource: nil, query_params: {},
|
6
|
+
parameters: {}, bucket: '', content_type: nil, import: false,
|
7
|
+
raw_data: false, blank_body: false, headers: {}, multipart: false)
|
8
|
+
|
9
|
+
@method = method
|
10
|
+
@resource = resource
|
11
|
+
@cano_resource = cano_resource
|
12
|
+
@query_params = query_params
|
13
|
+
@parameters = parameters
|
14
|
+
@bucket = bucket
|
15
|
+
@content_type = content_type
|
16
|
+
@import = import
|
17
|
+
@raw_data = raw_data
|
18
|
+
@blank_body = blank_body
|
19
|
+
@headers = headers
|
20
|
+
@multipart = multipart
|
21
|
+
end
|
22
|
+
|
23
|
+
attr_reader :method
|
24
|
+
attr_reader :resource
|
25
|
+
attr_reader :cano_resource
|
26
|
+
attr_reader :query_params
|
27
|
+
attr_reader :parameters
|
28
|
+
attr_reader :bucket
|
29
|
+
attr_reader :content_type
|
30
|
+
attr_reader :headers
|
31
|
+
|
32
|
+
def url(uri, force_path_style = false)
|
33
|
+
url = uri.host
|
34
|
+
url += ":#{uri.port}" unless uri.port == 80 || uri.port == 443
|
35
|
+
|
36
|
+
if @bucket.present?
|
37
|
+
if force_path_style
|
38
|
+
url += '/' unless url.end_with? "/"
|
39
|
+
url += @bucket
|
40
|
+
else
|
41
|
+
url = [@bucket, url].join('.')
|
42
|
+
url += '/' unless url.end_with? "/"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if @bucket.blank? || @resource != '/'
|
47
|
+
url = File.join(url, @resource)
|
48
|
+
end
|
49
|
+
|
50
|
+
url += '/' if url.split('/').last == @bucket
|
51
|
+
url += '?' if @cano_resource.present? || @query_params.present?
|
52
|
+
url += @cano_resource if @cano_resource
|
53
|
+
url += '&' if @cano_resource.present? && @query_params.present?
|
54
|
+
url += "#{@query_params.to_param}" if @query_params.present?
|
55
|
+
|
56
|
+
uri.scheme + '://' + url
|
57
|
+
end
|
58
|
+
|
59
|
+
def http_verb
|
60
|
+
@method.to_s.upcase
|
61
|
+
end
|
62
|
+
|
63
|
+
def signature_content_type
|
64
|
+
result = ""
|
65
|
+
if @content_type.present?
|
66
|
+
result << @content_type
|
67
|
+
end
|
68
|
+
|
69
|
+
result << "\n"
|
70
|
+
|
71
|
+
result
|
72
|
+
end
|
73
|
+
|
74
|
+
def authentication(access_key_id, secret_access_key, force_path_style)
|
75
|
+
|
76
|
+
"AWS" + " " + access_key_id + ":" + signature(secret_access_key, force_path_style)
|
77
|
+
end
|
78
|
+
|
79
|
+
def signature(secret_access_key, force_path_style = false)
|
80
|
+
http_verb = "#{self.http_verb}\n"
|
81
|
+
content_md5 = "\n"
|
82
|
+
content_type = signature_content_type
|
83
|
+
date = "#{calc_date}\n"
|
84
|
+
|
85
|
+
canonicalized_aws_headers = ""
|
86
|
+
|
87
|
+
string_to_sign = http_verb + content_md5 + content_type + date +
|
88
|
+
canonicalized_aws_headers + canonicalized_resource(force_path_style)
|
89
|
+
|
90
|
+
digest = OpenSSL::HMAC.digest(OpenSSL::Digest.new('sha1'), secret_access_key, string_to_sign)
|
91
|
+
Base64.encode64(digest).strip
|
92
|
+
end
|
93
|
+
|
94
|
+
def canonicalized_resource(force_path_style = false)
|
95
|
+
result = ''
|
96
|
+
|
97
|
+
if @bucket.present?
|
98
|
+
result = '/'
|
99
|
+
result += "#{@bucket}/"
|
100
|
+
end
|
101
|
+
|
102
|
+
if @bucket.blank? || @resource != '/'
|
103
|
+
result = File.join(result, @resource)
|
104
|
+
end
|
105
|
+
|
106
|
+
result += '?' if @cano_resource.present?
|
107
|
+
result += @cano_resource if @cano_resource
|
108
|
+
|
109
|
+
result
|
110
|
+
end
|
111
|
+
|
112
|
+
def calc_date
|
113
|
+
return @date if @date
|
114
|
+
@date = Time.now.httpdate
|
115
|
+
|
116
|
+
@date
|
117
|
+
end
|
118
|
+
|
119
|
+
def import?
|
120
|
+
@import
|
121
|
+
end
|
122
|
+
|
123
|
+
def multipart?
|
124
|
+
@multipart
|
125
|
+
end
|
126
|
+
|
127
|
+
def raw_data?
|
128
|
+
@raw_data
|
129
|
+
end
|
130
|
+
|
131
|
+
def blank_body?
|
132
|
+
@blank_body
|
133
|
+
end
|
134
|
+
|
135
|
+
def to_s
|
136
|
+
[
|
137
|
+
"method:#{@method}",
|
138
|
+
"resource: #{@resource}",
|
139
|
+
"cano_resource: #{@cano_resource}",
|
140
|
+
"query_params: #{@query_params}",
|
141
|
+
"bucket: #{@bucket}",
|
142
|
+
"parameters: #{@parameters}",
|
143
|
+
"headers: #{@headers}"
|
144
|
+
].join(", ")
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,363 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
require 'mime-types'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module S3
|
6
|
+
class Client::API
|
7
|
+
module Storage
|
8
|
+
def buckets
|
9
|
+
execute_storage(RestParameter.new(:get, '/'))
|
10
|
+
end
|
11
|
+
|
12
|
+
def objects(bucket, prefix: nil, max: nil, marker: nil, delimiter: nil)
|
13
|
+
resource = '/'
|
14
|
+
query_params = {}
|
15
|
+
if prefix
|
16
|
+
query_params.merge!('prefix' => prefix)
|
17
|
+
end
|
18
|
+
|
19
|
+
if max
|
20
|
+
query_params.merge!('max-keys' => max)
|
21
|
+
end
|
22
|
+
|
23
|
+
if marker
|
24
|
+
query_params.merge!('marker' => marker)
|
25
|
+
end
|
26
|
+
|
27
|
+
if delimiter
|
28
|
+
query_params.merge!('delimiter' => delimiter)
|
29
|
+
end
|
30
|
+
|
31
|
+
execute_storage(RestParameter.new(:get, resource, bucket: bucket, query_params: query_params))
|
32
|
+
end
|
33
|
+
|
34
|
+
def create_bucket(bucket, options = {})
|
35
|
+
resource = '/'
|
36
|
+
|
37
|
+
options = options.merge(bucket: bucket, content_type: 'application/xml')
|
38
|
+
execute_storage(RestParameter.new(:put, resource, options)) do
|
39
|
+
root = REXML::Element.new('CreateBucketConfiguration')
|
40
|
+
root.add_attribute('xmlns', 'http://s3.amazonaws.com/doc/2006-03-01/')
|
41
|
+
child = REXML::Element.new('LocationConstraint')
|
42
|
+
child.add_text(@location)
|
43
|
+
root.add_element(child)
|
44
|
+
root
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def create_object(bucket, object_name, options = {}, &block)
|
49
|
+
resource = "/#{object_name}"
|
50
|
+
|
51
|
+
type = MIME::Types.type_for(object_name).first
|
52
|
+
content_type = type ? type.to_s : 'application/octet-stream'
|
53
|
+
options = options.merge(bucket: bucket, content_type: content_type)
|
54
|
+
execute_storage(RestParameter.new(:put, resource, options), &block)
|
55
|
+
end
|
56
|
+
|
57
|
+
def create_multipart_object(bucket, object_name, options = {}, &block)
|
58
|
+
mu = MultipartUpload.new(bucket, object_name, options) do
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Initiate Multipart Upload
|
63
|
+
upload_id = mu.initiate_multipart_upload
|
64
|
+
|
65
|
+
begin
|
66
|
+
# Upload Part
|
67
|
+
upload_objects = mu.upload_part(upload_id, &block)
|
68
|
+
|
69
|
+
# Complete Multipart Upload
|
70
|
+
mu.complete_multipart_upload(upload_id, upload_objects)
|
71
|
+
|
72
|
+
rescue => e
|
73
|
+
# Abort Multipart Upload
|
74
|
+
mu.abort_multipart_upload(upload_id)
|
75
|
+
|
76
|
+
raise e
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_object(bucket, object, range = nil)
|
81
|
+
resource = "/#{object}"
|
82
|
+
headers = {}
|
83
|
+
if range
|
84
|
+
bt = "bytes=#{range.first}-"
|
85
|
+
bt += "#{range.last}" if range.last != -1
|
86
|
+
headers[:Range] = bt
|
87
|
+
end
|
88
|
+
execute_storage(RestParameter.new(:get, resource, bucket: bucket, raw_data: true, headers: headers))
|
89
|
+
end
|
90
|
+
|
91
|
+
def delete_bucket(bucket)
|
92
|
+
resource = '/'
|
93
|
+
execute_storage(RestParameter.new(:delete, resource, bucket: bucket))
|
94
|
+
end
|
95
|
+
|
96
|
+
def delete_object(bucket, object)
|
97
|
+
resource = "/#{object}"
|
98
|
+
execute_storage(RestParameter.new(:delete, resource, bucket: bucket, content_type: 'application/json'))
|
99
|
+
end
|
100
|
+
|
101
|
+
def import(db_name, tbl_name, file_paths, options = {})
|
102
|
+
_import = Import.new(db_name, tbl_name, file_paths, options) do
|
103
|
+
self
|
104
|
+
end
|
105
|
+
|
106
|
+
# calc label suffix => Fixnum
|
107
|
+
suffix = _import.calc_label_suffix
|
108
|
+
|
109
|
+
# import execute
|
110
|
+
upload_objects = _import.execute(suffix)
|
111
|
+
|
112
|
+
STDERR.puts "finished upload #{upload_objects.size} objects."
|
113
|
+
STDERR.puts
|
114
|
+
STDERR.puts 'upload_objects:'
|
115
|
+
upload_objects.each do |o|
|
116
|
+
STDERR.puts o
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
class Import
|
123
|
+
def initialize(db_name, tbl_name, file_paths, options = {}, &block)
|
124
|
+
@db_name = db_name
|
125
|
+
@tbl_naem = tbl_name
|
126
|
+
@file_paths = file_paths
|
127
|
+
@jobs = options.delete(:jobs) || 1
|
128
|
+
@label = options.delete(:label) || 'label'
|
129
|
+
@splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100M
|
130
|
+
@api = block[]
|
131
|
+
|
132
|
+
import_parameter = ImportParameter.instance
|
133
|
+
import_parameter.db_name = db_name
|
134
|
+
import_parameter.tbl_name = tbl_name
|
135
|
+
import_parameter.label = @label
|
136
|
+
|
137
|
+
if %w(_ .).include? @label[0]
|
138
|
+
raise S3::Client::ParameterInvalid.new("label should not start with '_' or '.'")
|
139
|
+
end
|
140
|
+
|
141
|
+
STDERR.puts "Initialize...\njobs: #{@jobs}, splitsz: #{@splitsz}"
|
142
|
+
end
|
143
|
+
|
144
|
+
def calc_label_suffix
|
145
|
+
prefix = ImportParameter.instance.storage_prefix
|
146
|
+
xml_doc = @api.objects(@db_name, prefix: prefix)
|
147
|
+
objects_result = S3::Concerns::ObjectsResult.new(xml_doc)
|
148
|
+
objects = objects_result.objects
|
149
|
+
|
150
|
+
return 0 if objects.blank?
|
151
|
+
|
152
|
+
objects.map { |o| o.scan(/#{@label}_(\d+)/) }.flatten.map(&:to_i).sort.reverse.first.try(:+, 1)
|
153
|
+
end
|
154
|
+
|
155
|
+
def execute(suffix)
|
156
|
+
file_paths = @file_paths.is_a?(String) ? [@file_paths] : @file_paths
|
157
|
+
|
158
|
+
upload_objects = []
|
159
|
+
file_paths.each do |file_path|
|
160
|
+
file_index = if file_path.end_with?('.gz')
|
161
|
+
import_gz_file(file_path, suffix, upload_objects)
|
162
|
+
elsif file_path == "-"
|
163
|
+
import_stream($stdin, suffix, upload_objects)
|
164
|
+
else
|
165
|
+
import_text_file(file_path, suffix, upload_objects)
|
166
|
+
end
|
167
|
+
|
168
|
+
suffix += file_index
|
169
|
+
end
|
170
|
+
|
171
|
+
return upload_objects
|
172
|
+
end
|
173
|
+
|
174
|
+
def import_gz_file(file_path, suffix, upload_objects)
|
175
|
+
import_stream(Zlib::GzipReader.open(file_path), suffix, upload_objects)
|
176
|
+
rescue Zlib::Error
|
177
|
+
#if not gzip
|
178
|
+
import_text_file(file_path, suffix, upload_objects)
|
179
|
+
end
|
180
|
+
|
181
|
+
def import_text_file(file_path, suffix, upload_objects)
|
182
|
+
import_stream(File.open(file_path), suffix, upload_objects)
|
183
|
+
end
|
184
|
+
|
185
|
+
def import_stream(ifp, suffix, upload_objects)
|
186
|
+
q = SizedQueue.new(@jobs)
|
187
|
+
th = Array.new(@jobs) {
|
188
|
+
Thread.new{
|
189
|
+
while data = q.pop
|
190
|
+
break unless data
|
191
|
+
STDERR.puts "> starting upload part #{data[2]}, #{data[1].length}"
|
192
|
+
execute_storage_detail(data[1], suffix + data[0])
|
193
|
+
STDERR.puts "< finished upload part #{data[2]}, #{data[1].length}"
|
194
|
+
upload_objects << ImportParameter.instance.object_label(suffix + data[0])
|
195
|
+
end
|
196
|
+
q.push nil
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
begin
|
201
|
+
file_index = 0
|
202
|
+
import_index = ImportParameter.instance.index
|
203
|
+
while true
|
204
|
+
buffer = ifp.read(@splitsz)
|
205
|
+
break unless buffer
|
206
|
+
buffer.force_encoding("ASCII-8BIT")
|
207
|
+
nline = ifp.gets
|
208
|
+
if nline
|
209
|
+
nline.force_encoding("ASCII-8BIT")
|
210
|
+
buffer.concat(nline)
|
211
|
+
end
|
212
|
+
q.push [file_index, buffer, import_index]
|
213
|
+
file_index += 1
|
214
|
+
import_index += 1
|
215
|
+
end
|
216
|
+
q.push nil
|
217
|
+
end
|
218
|
+
|
219
|
+
th.map(&:join)
|
220
|
+
ifp.close
|
221
|
+
|
222
|
+
file_index
|
223
|
+
end
|
224
|
+
|
225
|
+
def execute_storage_detail(data, suffix)
|
226
|
+
str = StringIO.new
|
227
|
+
gz = Zlib::GzipWriter.new(str)
|
228
|
+
gz.write data
|
229
|
+
gz.close
|
230
|
+
|
231
|
+
options = {
|
232
|
+
content_type: 'application/x-gzip',
|
233
|
+
bucket: @db_name,
|
234
|
+
import: true
|
235
|
+
}
|
236
|
+
|
237
|
+
resource = ImportParameter.instance.url(suffix)
|
238
|
+
@api.execute_storage(RestParameter.new(:put, resource, options)) do
|
239
|
+
str.string
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
class ImportParameter
|
244
|
+
include Singleton
|
245
|
+
|
246
|
+
attr_accessor :db_name, :tbl_name, :label, :index
|
247
|
+
|
248
|
+
def initialize
|
249
|
+
@index = 1
|
250
|
+
end
|
251
|
+
|
252
|
+
def url(suffix)
|
253
|
+
"/#{@tbl_name}/#{@label}_#{suffix}.gz"
|
254
|
+
end
|
255
|
+
|
256
|
+
def object_label(suffix)
|
257
|
+
"/#{@db_name}/#{@tbl_name}/#{@label}_#{suffix}.gz"
|
258
|
+
end
|
259
|
+
|
260
|
+
def file_label(suffix)
|
261
|
+
"#{@label}_#{suffix}"
|
262
|
+
end
|
263
|
+
|
264
|
+
def storage_prefix
|
265
|
+
"#{@tbl_name}/#{@label}"
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
class MultipartUpload
|
271
|
+
def initialize(bucket, object, options = {}, &block)
|
272
|
+
type = MIME::Types.type_for(object).first
|
273
|
+
content_type = type ? type.to_s : 'application/octet-stream'
|
274
|
+
options = options.merge(bucket: bucket, content_type: content_type)
|
275
|
+
|
276
|
+
@bucket = bucket
|
277
|
+
@object = object
|
278
|
+
@splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100MB
|
279
|
+
@jobs = options.delete(:jobs) || 1
|
280
|
+
@options = options
|
281
|
+
@api = block[]
|
282
|
+
end
|
283
|
+
|
284
|
+
def initiate_multipart_upload
|
285
|
+
STDERR.puts "Initiate multipart upload...\njobs:#{@jobs}, splitsz:#{@splitsz}"
|
286
|
+
resource = "/#{@object}?uploads"
|
287
|
+
response = @api.execute_storage(RestParameter.new(:post, resource, @options))
|
288
|
+
upload_id = response.elements['InitiateMultipartUploadResult/UploadId'].text
|
289
|
+
return upload_id
|
290
|
+
end
|
291
|
+
|
292
|
+
def upload_part(upload_id, &block)
|
293
|
+
upload_objects = {}
|
294
|
+
split_stream(upload_id, upload_objects, &block)
|
295
|
+
return Hash[upload_objects.sort]
|
296
|
+
end
|
297
|
+
|
298
|
+
def complete_multipart_upload(upload_id, upload_objects)
|
299
|
+
resource = "/#{@object}?uploadId=#{upload_id}"
|
300
|
+
|
301
|
+
payload = '<CompleteMultipartUpload>'
|
302
|
+
upload_objects.each do |part, etag|
|
303
|
+
payload += "<Part><PartNumber>#{part}</PartNumber><ETag>#{etag}</ETag></Part>"
|
304
|
+
end
|
305
|
+
payload += '</CompleteMultipartUpload>'
|
306
|
+
|
307
|
+
@api.execute_storage(RestParameter.new(:post, resource, @options)) do
|
308
|
+
payload
|
309
|
+
end
|
310
|
+
|
311
|
+
puts "complete multipart upload."
|
312
|
+
end
|
313
|
+
|
314
|
+
def abort_multipart_upload(upload_id)
|
315
|
+
resource = "/#{@object}?uploadId=#{upload_id}"
|
316
|
+
@api.execute_storage(RestParameter.new(:delete, resource, @options))
|
317
|
+
end
|
318
|
+
|
319
|
+
private
|
320
|
+
|
321
|
+
def split_stream(upload_id, upload_objects, &block)
|
322
|
+
limit = 5 * 1024 ** 2 #5MB
|
323
|
+
raise "split size is invalid. below lower limit of #{limit} byte" if @splitsz < limit
|
324
|
+
|
325
|
+
ifp = block[]
|
326
|
+
|
327
|
+
q = SizedQueue.new(@jobs)
|
328
|
+
th = Array.new(@jobs) {
|
329
|
+
Thread.new{
|
330
|
+
while data = q.pop
|
331
|
+
break unless data
|
332
|
+
puts "> starting upload part #{data[0]}, #{data[1].length}"
|
333
|
+
resource = "/#{@object}?partNumber=#{data[0]}&uploadId=#{upload_id}"
|
334
|
+
response = @api.execute_storage(RestParameter.new(:put, resource, @options)) do
|
335
|
+
data[1]
|
336
|
+
end
|
337
|
+
puts "< finished upload part #{data[0]}, #{data[1].length}"
|
338
|
+
upload_objects[data[0]] = response.headers['ETag'].first
|
339
|
+
end
|
340
|
+
q.push nil
|
341
|
+
}
|
342
|
+
}
|
343
|
+
|
344
|
+
begin
|
345
|
+
file_index = 1
|
346
|
+
while true
|
347
|
+
buffer = ifp.read(@splitsz)
|
348
|
+
break unless buffer
|
349
|
+
buffer.force_encoding("ASCII-8BIT")
|
350
|
+
|
351
|
+
q.push [file_index, buffer]
|
352
|
+
file_index += 1
|
353
|
+
end
|
354
|
+
q.push nil
|
355
|
+
end
|
356
|
+
|
357
|
+
th.map(&:join)
|
358
|
+
puts "finished upload #{file_index-1} part objects."
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
end
|