s3-client 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +174 -0
- data/Rakefile +40 -0
- data/config/settings.yml +11 -0
- data/lib/s3/client.rb +31 -0
- data/lib/s3/client/api.rb +258 -0
- data/lib/s3/client/api/rest_parameter.rb +149 -0
- data/lib/s3/client/api/storage.rb +363 -0
- data/lib/s3/client/exception.rb +20 -0
- data/lib/s3/client/model/bucket.rb +20 -0
- data/lib/s3/client/model/bucket_collection.rb +35 -0
- data/lib/s3/client/model/concerns/buckets_result.rb +21 -0
- data/lib/s3/client/model/concerns/objects_result.rb +35 -0
- data/lib/s3/client/model/object.rb +56 -0
- data/lib/s3/client/model/object_collection.rb +65 -0
- data/lib/s3/client/storage.rb +43 -0
- data/lib/s3/settings.rb +9 -0
- data/lib/s3/version.rb +3 -0
- data/s3-client.gemspec +30 -0
- metadata +176 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
module S3
|
2
|
+
class Client::API
|
3
|
+
class RestParameter
|
4
|
+
|
5
|
+
def initialize(method, resource, cano_resource: nil, query_params: {},
|
6
|
+
parameters: {}, bucket: '', content_type: nil, import: false,
|
7
|
+
raw_data: false, blank_body: false, headers: {}, multipart: false)
|
8
|
+
|
9
|
+
@method = method
|
10
|
+
@resource = resource
|
11
|
+
@cano_resource = cano_resource
|
12
|
+
@query_params = query_params
|
13
|
+
@parameters = parameters
|
14
|
+
@bucket = bucket
|
15
|
+
@content_type = content_type
|
16
|
+
@import = import
|
17
|
+
@raw_data = raw_data
|
18
|
+
@blank_body = blank_body
|
19
|
+
@headers = headers
|
20
|
+
@multipart = multipart
|
21
|
+
end
|
22
|
+
|
23
|
+
attr_reader :method
|
24
|
+
attr_reader :resource
|
25
|
+
attr_reader :cano_resource
|
26
|
+
attr_reader :query_params
|
27
|
+
attr_reader :parameters
|
28
|
+
attr_reader :bucket
|
29
|
+
attr_reader :content_type
|
30
|
+
attr_reader :headers
|
31
|
+
|
32
|
+
def url(uri, force_path_style = false)
|
33
|
+
url = uri.host
|
34
|
+
url += ":#{uri.port}" unless uri.port == 80 || uri.port == 443
|
35
|
+
|
36
|
+
if @bucket.present?
|
37
|
+
if force_path_style
|
38
|
+
url += '/' unless url.end_with? "/"
|
39
|
+
url += @bucket
|
40
|
+
else
|
41
|
+
url = [@bucket, url].join('.')
|
42
|
+
url += '/' unless url.end_with? "/"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if @bucket.blank? || @resource != '/'
|
47
|
+
url = File.join(url, @resource)
|
48
|
+
end
|
49
|
+
|
50
|
+
url += '/' if url.split('/').last == @bucket
|
51
|
+
url += '?' if @cano_resource.present? || @query_params.present?
|
52
|
+
url += @cano_resource if @cano_resource
|
53
|
+
url += '&' if @cano_resource.present? && @query_params.present?
|
54
|
+
url += "#{@query_params.to_param}" if @query_params.present?
|
55
|
+
|
56
|
+
uri.scheme + '://' + url
|
57
|
+
end
|
58
|
+
|
59
|
+
def http_verb
|
60
|
+
@method.to_s.upcase
|
61
|
+
end
|
62
|
+
|
63
|
+
def signature_content_type
|
64
|
+
result = ""
|
65
|
+
if @content_type.present?
|
66
|
+
result << @content_type
|
67
|
+
end
|
68
|
+
|
69
|
+
result << "\n"
|
70
|
+
|
71
|
+
result
|
72
|
+
end
|
73
|
+
|
74
|
+
def authentication(access_key_id, secret_access_key, force_path_style)
|
75
|
+
|
76
|
+
"AWS" + " " + access_key_id + ":" + signature(secret_access_key, force_path_style)
|
77
|
+
end
|
78
|
+
|
79
|
+
def signature(secret_access_key, force_path_style = false)
|
80
|
+
http_verb = "#{self.http_verb}\n"
|
81
|
+
content_md5 = "\n"
|
82
|
+
content_type = signature_content_type
|
83
|
+
date = "#{calc_date}\n"
|
84
|
+
|
85
|
+
canonicalized_aws_headers = ""
|
86
|
+
|
87
|
+
string_to_sign = http_verb + content_md5 + content_type + date +
|
88
|
+
canonicalized_aws_headers + canonicalized_resource(force_path_style)
|
89
|
+
|
90
|
+
digest = OpenSSL::HMAC.digest(OpenSSL::Digest.new('sha1'), secret_access_key, string_to_sign)
|
91
|
+
Base64.encode64(digest).strip
|
92
|
+
end
|
93
|
+
|
94
|
+
def canonicalized_resource(force_path_style = false)
|
95
|
+
result = ''
|
96
|
+
|
97
|
+
if @bucket.present?
|
98
|
+
result = '/'
|
99
|
+
result += "#{@bucket}/"
|
100
|
+
end
|
101
|
+
|
102
|
+
if @bucket.blank? || @resource != '/'
|
103
|
+
result = File.join(result, @resource)
|
104
|
+
end
|
105
|
+
|
106
|
+
result += '?' if @cano_resource.present?
|
107
|
+
result += @cano_resource if @cano_resource
|
108
|
+
|
109
|
+
result
|
110
|
+
end
|
111
|
+
|
112
|
+
def calc_date
|
113
|
+
return @date if @date
|
114
|
+
@date = Time.now.httpdate
|
115
|
+
|
116
|
+
@date
|
117
|
+
end
|
118
|
+
|
119
|
+
def import?
|
120
|
+
@import
|
121
|
+
end
|
122
|
+
|
123
|
+
def multipart?
|
124
|
+
@multipart
|
125
|
+
end
|
126
|
+
|
127
|
+
def raw_data?
|
128
|
+
@raw_data
|
129
|
+
end
|
130
|
+
|
131
|
+
def blank_body?
|
132
|
+
@blank_body
|
133
|
+
end
|
134
|
+
|
135
|
+
def to_s
|
136
|
+
[
|
137
|
+
"method:#{@method}",
|
138
|
+
"resource: #{@resource}",
|
139
|
+
"cano_resource: #{@cano_resource}",
|
140
|
+
"query_params: #{@query_params}",
|
141
|
+
"bucket: #{@bucket}",
|
142
|
+
"parameters: #{@parameters}",
|
143
|
+
"headers: #{@headers}"
|
144
|
+
].join(", ")
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,363 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
require 'mime-types'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module S3
|
6
|
+
class Client::API
|
7
|
+
module Storage
|
8
|
+
def buckets
|
9
|
+
execute_storage(RestParameter.new(:get, '/'))
|
10
|
+
end
|
11
|
+
|
12
|
+
def objects(bucket, prefix: nil, max: nil, marker: nil, delimiter: nil)
|
13
|
+
resource = '/'
|
14
|
+
query_params = {}
|
15
|
+
if prefix
|
16
|
+
query_params.merge!('prefix' => prefix)
|
17
|
+
end
|
18
|
+
|
19
|
+
if max
|
20
|
+
query_params.merge!('max-keys' => max)
|
21
|
+
end
|
22
|
+
|
23
|
+
if marker
|
24
|
+
query_params.merge!('marker' => marker)
|
25
|
+
end
|
26
|
+
|
27
|
+
if delimiter
|
28
|
+
query_params.merge!('delimiter' => delimiter)
|
29
|
+
end
|
30
|
+
|
31
|
+
execute_storage(RestParameter.new(:get, resource, bucket: bucket, query_params: query_params))
|
32
|
+
end
|
33
|
+
|
34
|
+
def create_bucket(bucket, options = {})
|
35
|
+
resource = '/'
|
36
|
+
|
37
|
+
options = options.merge(bucket: bucket, content_type: 'application/xml')
|
38
|
+
execute_storage(RestParameter.new(:put, resource, options)) do
|
39
|
+
root = REXML::Element.new('CreateBucketConfiguration')
|
40
|
+
root.add_attribute('xmlns', 'http://s3.amazonaws.com/doc/2006-03-01/')
|
41
|
+
child = REXML::Element.new('LocationConstraint')
|
42
|
+
child.add_text(@location)
|
43
|
+
root.add_element(child)
|
44
|
+
root
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def create_object(bucket, object_name, options = {}, &block)
|
49
|
+
resource = "/#{object_name}"
|
50
|
+
|
51
|
+
type = MIME::Types.type_for(object_name).first
|
52
|
+
content_type = type ? type.to_s : 'application/octet-stream'
|
53
|
+
options = options.merge(bucket: bucket, content_type: content_type)
|
54
|
+
execute_storage(RestParameter.new(:put, resource, options), &block)
|
55
|
+
end
|
56
|
+
|
57
|
+
def create_multipart_object(bucket, object_name, options = {}, &block)
|
58
|
+
mu = MultipartUpload.new(bucket, object_name, options) do
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Initiate Multipart Upload
|
63
|
+
upload_id = mu.initiate_multipart_upload
|
64
|
+
|
65
|
+
begin
|
66
|
+
# Upload Part
|
67
|
+
upload_objects = mu.upload_part(upload_id, &block)
|
68
|
+
|
69
|
+
# Complete Multipart Upload
|
70
|
+
mu.complete_multipart_upload(upload_id, upload_objects)
|
71
|
+
|
72
|
+
rescue => e
|
73
|
+
# Abort Multipart Upload
|
74
|
+
mu.abort_multipart_upload(upload_id)
|
75
|
+
|
76
|
+
raise e
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_object(bucket, object, range = nil)
|
81
|
+
resource = "/#{object}"
|
82
|
+
headers = {}
|
83
|
+
if range
|
84
|
+
bt = "bytes=#{range.first}-"
|
85
|
+
bt += "#{range.last}" if range.last != -1
|
86
|
+
headers[:Range] = bt
|
87
|
+
end
|
88
|
+
execute_storage(RestParameter.new(:get, resource, bucket: bucket, raw_data: true, headers: headers))
|
89
|
+
end
|
90
|
+
|
91
|
+
def delete_bucket(bucket)
|
92
|
+
resource = '/'
|
93
|
+
execute_storage(RestParameter.new(:delete, resource, bucket: bucket))
|
94
|
+
end
|
95
|
+
|
96
|
+
def delete_object(bucket, object)
|
97
|
+
resource = "/#{object}"
|
98
|
+
execute_storage(RestParameter.new(:delete, resource, bucket: bucket, content_type: 'application/json'))
|
99
|
+
end
|
100
|
+
|
101
|
+
def import(db_name, tbl_name, file_paths, options = {})
|
102
|
+
_import = Import.new(db_name, tbl_name, file_paths, options) do
|
103
|
+
self
|
104
|
+
end
|
105
|
+
|
106
|
+
# calc label suffix => Fixnum
|
107
|
+
suffix = _import.calc_label_suffix
|
108
|
+
|
109
|
+
# import execute
|
110
|
+
upload_objects = _import.execute(suffix)
|
111
|
+
|
112
|
+
STDERR.puts "finished upload #{upload_objects.size} objects."
|
113
|
+
STDERR.puts
|
114
|
+
STDERR.puts 'upload_objects:'
|
115
|
+
upload_objects.each do |o|
|
116
|
+
STDERR.puts o
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
class Import
|
123
|
+
def initialize(db_name, tbl_name, file_paths, options = {}, &block)
|
124
|
+
@db_name = db_name
|
125
|
+
@tbl_naem = tbl_name
|
126
|
+
@file_paths = file_paths
|
127
|
+
@jobs = options.delete(:jobs) || 1
|
128
|
+
@label = options.delete(:label) || 'label'
|
129
|
+
@splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100M
|
130
|
+
@api = block[]
|
131
|
+
|
132
|
+
import_parameter = ImportParameter.instance
|
133
|
+
import_parameter.db_name = db_name
|
134
|
+
import_parameter.tbl_name = tbl_name
|
135
|
+
import_parameter.label = @label
|
136
|
+
|
137
|
+
if %w(_ .).include? @label[0]
|
138
|
+
raise S3::Client::ParameterInvalid.new("label should not start with '_' or '.'")
|
139
|
+
end
|
140
|
+
|
141
|
+
STDERR.puts "Initialize...\njobs: #{@jobs}, splitsz: #{@splitsz}"
|
142
|
+
end
|
143
|
+
|
144
|
+
def calc_label_suffix
|
145
|
+
prefix = ImportParameter.instance.storage_prefix
|
146
|
+
xml_doc = @api.objects(@db_name, prefix: prefix)
|
147
|
+
objects_result = S3::Concerns::ObjectsResult.new(xml_doc)
|
148
|
+
objects = objects_result.objects
|
149
|
+
|
150
|
+
return 0 if objects.blank?
|
151
|
+
|
152
|
+
objects.map { |o| o.scan(/#{@label}_(\d+)/) }.flatten.map(&:to_i).sort.reverse.first.try(:+, 1)
|
153
|
+
end
|
154
|
+
|
155
|
+
def execute(suffix)
|
156
|
+
file_paths = @file_paths.is_a?(String) ? [@file_paths] : @file_paths
|
157
|
+
|
158
|
+
upload_objects = []
|
159
|
+
file_paths.each do |file_path|
|
160
|
+
file_index = if file_path.end_with?('.gz')
|
161
|
+
import_gz_file(file_path, suffix, upload_objects)
|
162
|
+
elsif file_path == "-"
|
163
|
+
import_stream($stdin, suffix, upload_objects)
|
164
|
+
else
|
165
|
+
import_text_file(file_path, suffix, upload_objects)
|
166
|
+
end
|
167
|
+
|
168
|
+
suffix += file_index
|
169
|
+
end
|
170
|
+
|
171
|
+
return upload_objects
|
172
|
+
end
|
173
|
+
|
174
|
+
def import_gz_file(file_path, suffix, upload_objects)
|
175
|
+
import_stream(Zlib::GzipReader.open(file_path), suffix, upload_objects)
|
176
|
+
rescue Zlib::Error
|
177
|
+
#if not gzip
|
178
|
+
import_text_file(file_path, suffix, upload_objects)
|
179
|
+
end
|
180
|
+
|
181
|
+
def import_text_file(file_path, suffix, upload_objects)
|
182
|
+
import_stream(File.open(file_path), suffix, upload_objects)
|
183
|
+
end
|
184
|
+
|
185
|
+
def import_stream(ifp, suffix, upload_objects)
|
186
|
+
q = SizedQueue.new(@jobs)
|
187
|
+
th = Array.new(@jobs) {
|
188
|
+
Thread.new{
|
189
|
+
while data = q.pop
|
190
|
+
break unless data
|
191
|
+
STDERR.puts "> starting upload part #{data[2]}, #{data[1].length}"
|
192
|
+
execute_storage_detail(data[1], suffix + data[0])
|
193
|
+
STDERR.puts "< finished upload part #{data[2]}, #{data[1].length}"
|
194
|
+
upload_objects << ImportParameter.instance.object_label(suffix + data[0])
|
195
|
+
end
|
196
|
+
q.push nil
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
begin
|
201
|
+
file_index = 0
|
202
|
+
import_index = ImportParameter.instance.index
|
203
|
+
while true
|
204
|
+
buffer = ifp.read(@splitsz)
|
205
|
+
break unless buffer
|
206
|
+
buffer.force_encoding("ASCII-8BIT")
|
207
|
+
nline = ifp.gets
|
208
|
+
if nline
|
209
|
+
nline.force_encoding("ASCII-8BIT")
|
210
|
+
buffer.concat(nline)
|
211
|
+
end
|
212
|
+
q.push [file_index, buffer, import_index]
|
213
|
+
file_index += 1
|
214
|
+
import_index += 1
|
215
|
+
end
|
216
|
+
q.push nil
|
217
|
+
end
|
218
|
+
|
219
|
+
th.map(&:join)
|
220
|
+
ifp.close
|
221
|
+
|
222
|
+
file_index
|
223
|
+
end
|
224
|
+
|
225
|
+
def execute_storage_detail(data, suffix)
|
226
|
+
str = StringIO.new
|
227
|
+
gz = Zlib::GzipWriter.new(str)
|
228
|
+
gz.write data
|
229
|
+
gz.close
|
230
|
+
|
231
|
+
options = {
|
232
|
+
content_type: 'application/x-gzip',
|
233
|
+
bucket: @db_name,
|
234
|
+
import: true
|
235
|
+
}
|
236
|
+
|
237
|
+
resource = ImportParameter.instance.url(suffix)
|
238
|
+
@api.execute_storage(RestParameter.new(:put, resource, options)) do
|
239
|
+
str.string
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
class ImportParameter
|
244
|
+
include Singleton
|
245
|
+
|
246
|
+
attr_accessor :db_name, :tbl_name, :label, :index
|
247
|
+
|
248
|
+
def initialize
|
249
|
+
@index = 1
|
250
|
+
end
|
251
|
+
|
252
|
+
def url(suffix)
|
253
|
+
"/#{@tbl_name}/#{@label}_#{suffix}.gz"
|
254
|
+
end
|
255
|
+
|
256
|
+
def object_label(suffix)
|
257
|
+
"/#{@db_name}/#{@tbl_name}/#{@label}_#{suffix}.gz"
|
258
|
+
end
|
259
|
+
|
260
|
+
def file_label(suffix)
|
261
|
+
"#{@label}_#{suffix}"
|
262
|
+
end
|
263
|
+
|
264
|
+
def storage_prefix
|
265
|
+
"#{@tbl_name}/#{@label}"
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
class MultipartUpload
|
271
|
+
def initialize(bucket, object, options = {}, &block)
|
272
|
+
type = MIME::Types.type_for(object).first
|
273
|
+
content_type = type ? type.to_s : 'application/octet-stream'
|
274
|
+
options = options.merge(bucket: bucket, content_type: content_type)
|
275
|
+
|
276
|
+
@bucket = bucket
|
277
|
+
@object = object
|
278
|
+
@splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100MB
|
279
|
+
@jobs = options.delete(:jobs) || 1
|
280
|
+
@options = options
|
281
|
+
@api = block[]
|
282
|
+
end
|
283
|
+
|
284
|
+
def initiate_multipart_upload
|
285
|
+
STDERR.puts "Initiate multipart upload...\njobs:#{@jobs}, splitsz:#{@splitsz}"
|
286
|
+
resource = "/#{@object}?uploads"
|
287
|
+
response = @api.execute_storage(RestParameter.new(:post, resource, @options))
|
288
|
+
upload_id = response.elements['InitiateMultipartUploadResult/UploadId'].text
|
289
|
+
return upload_id
|
290
|
+
end
|
291
|
+
|
292
|
+
def upload_part(upload_id, &block)
|
293
|
+
upload_objects = {}
|
294
|
+
split_stream(upload_id, upload_objects, &block)
|
295
|
+
return Hash[upload_objects.sort]
|
296
|
+
end
|
297
|
+
|
298
|
+
def complete_multipart_upload(upload_id, upload_objects)
|
299
|
+
resource = "/#{@object}?uploadId=#{upload_id}"
|
300
|
+
|
301
|
+
payload = '<CompleteMultipartUpload>'
|
302
|
+
upload_objects.each do |part, etag|
|
303
|
+
payload += "<Part><PartNumber>#{part}</PartNumber><ETag>#{etag}</ETag></Part>"
|
304
|
+
end
|
305
|
+
payload += '</CompleteMultipartUpload>'
|
306
|
+
|
307
|
+
@api.execute_storage(RestParameter.new(:post, resource, @options)) do
|
308
|
+
payload
|
309
|
+
end
|
310
|
+
|
311
|
+
puts "complete multipart upload."
|
312
|
+
end
|
313
|
+
|
314
|
+
def abort_multipart_upload(upload_id)
|
315
|
+
resource = "/#{@object}?uploadId=#{upload_id}"
|
316
|
+
@api.execute_storage(RestParameter.new(:delete, resource, @options))
|
317
|
+
end
|
318
|
+
|
319
|
+
private
|
320
|
+
|
321
|
+
def split_stream(upload_id, upload_objects, &block)
|
322
|
+
limit = 5 * 1024 ** 2 #5MB
|
323
|
+
raise "split size is invalid. below lower limit of #{limit} byte" if @splitsz < limit
|
324
|
+
|
325
|
+
ifp = block[]
|
326
|
+
|
327
|
+
q = SizedQueue.new(@jobs)
|
328
|
+
th = Array.new(@jobs) {
|
329
|
+
Thread.new{
|
330
|
+
while data = q.pop
|
331
|
+
break unless data
|
332
|
+
puts "> starting upload part #{data[0]}, #{data[1].length}"
|
333
|
+
resource = "/#{@object}?partNumber=#{data[0]}&uploadId=#{upload_id}"
|
334
|
+
response = @api.execute_storage(RestParameter.new(:put, resource, @options)) do
|
335
|
+
data[1]
|
336
|
+
end
|
337
|
+
puts "< finished upload part #{data[0]}, #{data[1].length}"
|
338
|
+
upload_objects[data[0]] = response.headers['ETag'].first
|
339
|
+
end
|
340
|
+
q.push nil
|
341
|
+
}
|
342
|
+
}
|
343
|
+
|
344
|
+
begin
|
345
|
+
file_index = 1
|
346
|
+
while true
|
347
|
+
buffer = ifp.read(@splitsz)
|
348
|
+
break unless buffer
|
349
|
+
buffer.force_encoding("ASCII-8BIT")
|
350
|
+
|
351
|
+
q.push [file_index, buffer]
|
352
|
+
file_index += 1
|
353
|
+
end
|
354
|
+
q.push nil
|
355
|
+
end
|
356
|
+
|
357
|
+
th.map(&:join)
|
358
|
+
puts "finished upload #{file_index-1} part objects."
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
end
|