s3-client 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,149 @@
1
+ module S3
2
+ class Client::API
3
+ class RestParameter
4
+
5
+ def initialize(method, resource, cano_resource: nil, query_params: {},
6
+ parameters: {}, bucket: '', content_type: nil, import: false,
7
+ raw_data: false, blank_body: false, headers: {}, multipart: false)
8
+
9
+ @method = method
10
+ @resource = resource
11
+ @cano_resource = cano_resource
12
+ @query_params = query_params
13
+ @parameters = parameters
14
+ @bucket = bucket
15
+ @content_type = content_type
16
+ @import = import
17
+ @raw_data = raw_data
18
+ @blank_body = blank_body
19
+ @headers = headers
20
+ @multipart = multipart
21
+ end
22
+
23
+ attr_reader :method
24
+ attr_reader :resource
25
+ attr_reader :cano_resource
26
+ attr_reader :query_params
27
+ attr_reader :parameters
28
+ attr_reader :bucket
29
+ attr_reader :content_type
30
+ attr_reader :headers
31
+
32
+ def url(uri, force_path_style = false)
33
+ url = uri.host
34
+ url += ":#{uri.port}" unless uri.port == 80 || uri.port == 443
35
+
36
+ if @bucket.present?
37
+ if force_path_style
38
+ url += '/' unless url.end_with? "/"
39
+ url += @bucket
40
+ else
41
+ url = [@bucket, url].join('.')
42
+ url += '/' unless url.end_with? "/"
43
+ end
44
+ end
45
+
46
+ if @bucket.blank? || @resource != '/'
47
+ url = File.join(url, @resource)
48
+ end
49
+
50
+ url += '/' if url.split('/').last == @bucket
51
+ url += '?' if @cano_resource.present? || @query_params.present?
52
+ url += @cano_resource if @cano_resource
53
+ url += '&' if @cano_resource.present? && @query_params.present?
54
+ url += "#{@query_params.to_param}" if @query_params.present?
55
+
56
+ uri.scheme + '://' + url
57
+ end
58
+
59
+ def http_verb
60
+ @method.to_s.upcase
61
+ end
62
+
63
+ def signature_content_type
64
+ result = ""
65
+ if @content_type.present?
66
+ result << @content_type
67
+ end
68
+
69
+ result << "\n"
70
+
71
+ result
72
+ end
73
+
74
+ def authentication(access_key_id, secret_access_key, force_path_style)
75
+
76
+ "AWS" + " " + access_key_id + ":" + signature(secret_access_key, force_path_style)
77
+ end
78
+
79
+ def signature(secret_access_key, force_path_style = false)
80
+ http_verb = "#{self.http_verb}\n"
81
+ content_md5 = "\n"
82
+ content_type = signature_content_type
83
+ date = "#{calc_date}\n"
84
+
85
+ canonicalized_aws_headers = ""
86
+
87
+ string_to_sign = http_verb + content_md5 + content_type + date +
88
+ canonicalized_aws_headers + canonicalized_resource(force_path_style)
89
+
90
+ digest = OpenSSL::HMAC.digest(OpenSSL::Digest.new('sha1'), secret_access_key, string_to_sign)
91
+ Base64.encode64(digest).strip
92
+ end
93
+
94
+ def canonicalized_resource(force_path_style = false)
95
+ result = ''
96
+
97
+ if @bucket.present?
98
+ result = '/'
99
+ result += "#{@bucket}/"
100
+ end
101
+
102
+ if @bucket.blank? || @resource != '/'
103
+ result = File.join(result, @resource)
104
+ end
105
+
106
+ result += '?' if @cano_resource.present?
107
+ result += @cano_resource if @cano_resource
108
+
109
+ result
110
+ end
111
+
112
+ def calc_date
113
+ return @date if @date
114
+ @date = Time.now.httpdate
115
+
116
+ @date
117
+ end
118
+
119
+ def import?
120
+ @import
121
+ end
122
+
123
+ def multipart?
124
+ @multipart
125
+ end
126
+
127
+ def raw_data?
128
+ @raw_data
129
+ end
130
+
131
+ def blank_body?
132
+ @blank_body
133
+ end
134
+
135
+ def to_s
136
+ [
137
+ "method:#{@method}",
138
+ "resource: #{@resource}",
139
+ "cano_resource: #{@cano_resource}",
140
+ "query_params: #{@query_params}",
141
+ "bucket: #{@bucket}",
142
+ "parameters: #{@parameters}",
143
+ "headers: #{@headers}"
144
+ ].join(", ")
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,363 @@
1
+ require 'zlib'
2
+ require 'mime-types'
3
+ require 'singleton'
4
+
5
+ module S3
6
+ class Client::API
7
+ module Storage
8
+ def buckets
9
+ execute_storage(RestParameter.new(:get, '/'))
10
+ end
11
+
12
+ def objects(bucket, prefix: nil, max: nil, marker: nil, delimiter: nil)
13
+ resource = '/'
14
+ query_params = {}
15
+ if prefix
16
+ query_params.merge!('prefix' => prefix)
17
+ end
18
+
19
+ if max
20
+ query_params.merge!('max-keys' => max)
21
+ end
22
+
23
+ if marker
24
+ query_params.merge!('marker' => marker)
25
+ end
26
+
27
+ if delimiter
28
+ query_params.merge!('delimiter' => delimiter)
29
+ end
30
+
31
+ execute_storage(RestParameter.new(:get, resource, bucket: bucket, query_params: query_params))
32
+ end
33
+
34
+ def create_bucket(bucket, options = {})
35
+ resource = '/'
36
+
37
+ options = options.merge(bucket: bucket, content_type: 'application/xml')
38
+ execute_storage(RestParameter.new(:put, resource, options)) do
39
+ root = REXML::Element.new('CreateBucketConfiguration')
40
+ root.add_attribute('xmlns', 'http://s3.amazonaws.com/doc/2006-03-01/')
41
+ child = REXML::Element.new('LocationConstraint')
42
+ child.add_text(@location)
43
+ root.add_element(child)
44
+ root
45
+ end
46
+ end
47
+
48
+ def create_object(bucket, object_name, options = {}, &block)
49
+ resource = "/#{object_name}"
50
+
51
+ type = MIME::Types.type_for(object_name).first
52
+ content_type = type ? type.to_s : 'application/octet-stream'
53
+ options = options.merge(bucket: bucket, content_type: content_type)
54
+ execute_storage(RestParameter.new(:put, resource, options), &block)
55
+ end
56
+
57
+ def create_multipart_object(bucket, object_name, options = {}, &block)
58
+ mu = MultipartUpload.new(bucket, object_name, options) do
59
+ self
60
+ end
61
+
62
+ # Initiate Multipart Upload
63
+ upload_id = mu.initiate_multipart_upload
64
+
65
+ begin
66
+ # Upload Part
67
+ upload_objects = mu.upload_part(upload_id, &block)
68
+
69
+ # Complete Multipart Upload
70
+ mu.complete_multipart_upload(upload_id, upload_objects)
71
+
72
+ rescue => e
73
+ # Abort Multipart Upload
74
+ mu.abort_multipart_upload(upload_id)
75
+
76
+ raise e
77
+ end
78
+ end
79
+
80
+ def get_object(bucket, object, range = nil)
81
+ resource = "/#{object}"
82
+ headers = {}
83
+ if range
84
+ bt = "bytes=#{range.first}-"
85
+ bt += "#{range.last}" if range.last != -1
86
+ headers[:Range] = bt
87
+ end
88
+ execute_storage(RestParameter.new(:get, resource, bucket: bucket, raw_data: true, headers: headers))
89
+ end
90
+
91
+ def delete_bucket(bucket)
92
+ resource = '/'
93
+ execute_storage(RestParameter.new(:delete, resource, bucket: bucket))
94
+ end
95
+
96
+ def delete_object(bucket, object)
97
+ resource = "/#{object}"
98
+ execute_storage(RestParameter.new(:delete, resource, bucket: bucket, content_type: 'application/json'))
99
+ end
100
+
101
+ def import(db_name, tbl_name, file_paths, options = {})
102
+ _import = Import.new(db_name, tbl_name, file_paths, options) do
103
+ self
104
+ end
105
+
106
+ # calc label suffix => Fixnum
107
+ suffix = _import.calc_label_suffix
108
+
109
+ # import execute
110
+ upload_objects = _import.execute(suffix)
111
+
112
+ STDERR.puts "finished upload #{upload_objects.size} objects."
113
+ STDERR.puts
114
+ STDERR.puts 'upload_objects:'
115
+ upload_objects.each do |o|
116
+ STDERR.puts o
117
+ end
118
+ end
119
+
120
+ private
121
+
122
+ class Import
123
+ def initialize(db_name, tbl_name, file_paths, options = {}, &block)
124
+ @db_name = db_name
125
+ @tbl_naem = tbl_name
126
+ @file_paths = file_paths
127
+ @jobs = options.delete(:jobs) || 1
128
+ @label = options.delete(:label) || 'label'
129
+ @splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100M
130
+ @api = block[]
131
+
132
+ import_parameter = ImportParameter.instance
133
+ import_parameter.db_name = db_name
134
+ import_parameter.tbl_name = tbl_name
135
+ import_parameter.label = @label
136
+
137
+ if %w(_ .).include? @label[0]
138
+ raise S3::Client::ParameterInvalid.new("label should not start with '_' or '.'")
139
+ end
140
+
141
+ STDERR.puts "Initialize...\njobs: #{@jobs}, splitsz: #{@splitsz}"
142
+ end
143
+
144
+ def calc_label_suffix
145
+ prefix = ImportParameter.instance.storage_prefix
146
+ xml_doc = @api.objects(@db_name, prefix: prefix)
147
+ objects_result = S3::Concerns::ObjectsResult.new(xml_doc)
148
+ objects = objects_result.objects
149
+
150
+ return 0 if objects.blank?
151
+
152
+ objects.map { |o| o.scan(/#{@label}_(\d+)/) }.flatten.map(&:to_i).sort.reverse.first.try(:+, 1)
153
+ end
154
+
155
+ def execute(suffix)
156
+ file_paths = @file_paths.is_a?(String) ? [@file_paths] : @file_paths
157
+
158
+ upload_objects = []
159
+ file_paths.each do |file_path|
160
+ file_index = if file_path.end_with?('.gz')
161
+ import_gz_file(file_path, suffix, upload_objects)
162
+ elsif file_path == "-"
163
+ import_stream($stdin, suffix, upload_objects)
164
+ else
165
+ import_text_file(file_path, suffix, upload_objects)
166
+ end
167
+
168
+ suffix += file_index
169
+ end
170
+
171
+ return upload_objects
172
+ end
173
+
174
+ def import_gz_file(file_path, suffix, upload_objects)
175
+ import_stream(Zlib::GzipReader.open(file_path), suffix, upload_objects)
176
+ rescue Zlib::Error
177
+ #if not gzip
178
+ import_text_file(file_path, suffix, upload_objects)
179
+ end
180
+
181
+ def import_text_file(file_path, suffix, upload_objects)
182
+ import_stream(File.open(file_path), suffix, upload_objects)
183
+ end
184
+
185
+ def import_stream(ifp, suffix, upload_objects)
186
+ q = SizedQueue.new(@jobs)
187
+ th = Array.new(@jobs) {
188
+ Thread.new{
189
+ while data = q.pop
190
+ break unless data
191
+ STDERR.puts "> starting upload part #{data[2]}, #{data[1].length}"
192
+ execute_storage_detail(data[1], suffix + data[0])
193
+ STDERR.puts "< finished upload part #{data[2]}, #{data[1].length}"
194
+ upload_objects << ImportParameter.instance.object_label(suffix + data[0])
195
+ end
196
+ q.push nil
197
+ }
198
+ }
199
+
200
+ begin
201
+ file_index = 0
202
+ import_index = ImportParameter.instance.index
203
+ while true
204
+ buffer = ifp.read(@splitsz)
205
+ break unless buffer
206
+ buffer.force_encoding("ASCII-8BIT")
207
+ nline = ifp.gets
208
+ if nline
209
+ nline.force_encoding("ASCII-8BIT")
210
+ buffer.concat(nline)
211
+ end
212
+ q.push [file_index, buffer, import_index]
213
+ file_index += 1
214
+ import_index += 1
215
+ end
216
+ q.push nil
217
+ end
218
+
219
+ th.map(&:join)
220
+ ifp.close
221
+
222
+ file_index
223
+ end
224
+
225
+ def execute_storage_detail(data, suffix)
226
+ str = StringIO.new
227
+ gz = Zlib::GzipWriter.new(str)
228
+ gz.write data
229
+ gz.close
230
+
231
+ options = {
232
+ content_type: 'application/x-gzip',
233
+ bucket: @db_name,
234
+ import: true
235
+ }
236
+
237
+ resource = ImportParameter.instance.url(suffix)
238
+ @api.execute_storage(RestParameter.new(:put, resource, options)) do
239
+ str.string
240
+ end
241
+ end
242
+
243
+ class ImportParameter
244
+ include Singleton
245
+
246
+ attr_accessor :db_name, :tbl_name, :label, :index
247
+
248
+ def initialize
249
+ @index = 1
250
+ end
251
+
252
+ def url(suffix)
253
+ "/#{@tbl_name}/#{@label}_#{suffix}.gz"
254
+ end
255
+
256
+ def object_label(suffix)
257
+ "/#{@db_name}/#{@tbl_name}/#{@label}_#{suffix}.gz"
258
+ end
259
+
260
+ def file_label(suffix)
261
+ "#{@label}_#{suffix}"
262
+ end
263
+
264
+ def storage_prefix
265
+ "#{@tbl_name}/#{@label}"
266
+ end
267
+ end
268
+ end
269
+
270
+ class MultipartUpload
271
+ def initialize(bucket, object, options = {}, &block)
272
+ type = MIME::Types.type_for(object).first
273
+ content_type = type ? type.to_s : 'application/octet-stream'
274
+ options = options.merge(bucket: bucket, content_type: content_type)
275
+
276
+ @bucket = bucket
277
+ @object = object
278
+ @splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100MB
279
+ @jobs = options.delete(:jobs) || 1
280
+ @options = options
281
+ @api = block[]
282
+ end
283
+
284
+ def initiate_multipart_upload
285
+ STDERR.puts "Initiate multipart upload...\njobs:#{@jobs}, splitsz:#{@splitsz}"
286
+ resource = "/#{@object}?uploads"
287
+ response = @api.execute_storage(RestParameter.new(:post, resource, @options))
288
+ upload_id = response.elements['InitiateMultipartUploadResult/UploadId'].text
289
+ return upload_id
290
+ end
291
+
292
+ def upload_part(upload_id, &block)
293
+ upload_objects = {}
294
+ split_stream(upload_id, upload_objects, &block)
295
+ return Hash[upload_objects.sort]
296
+ end
297
+
298
+ def complete_multipart_upload(upload_id, upload_objects)
299
+ resource = "/#{@object}?uploadId=#{upload_id}"
300
+
301
+ payload = '<CompleteMultipartUpload>'
302
+ upload_objects.each do |part, etag|
303
+ payload += "<Part><PartNumber>#{part}</PartNumber><ETag>#{etag}</ETag></Part>"
304
+ end
305
+ payload += '</CompleteMultipartUpload>'
306
+
307
+ @api.execute_storage(RestParameter.new(:post, resource, @options)) do
308
+ payload
309
+ end
310
+
311
+ puts "complete multipart upload."
312
+ end
313
+
314
+ def abort_multipart_upload(upload_id)
315
+ resource = "/#{@object}?uploadId=#{upload_id}"
316
+ @api.execute_storage(RestParameter.new(:delete, resource, @options))
317
+ end
318
+
319
+ private
320
+
321
+ def split_stream(upload_id, upload_objects, &block)
322
+ limit = 5 * 1024 ** 2 #5MB
323
+ raise "split size is invalid. below lower limit of #{limit} byte" if @splitsz < limit
324
+
325
+ ifp = block[]
326
+
327
+ q = SizedQueue.new(@jobs)
328
+ th = Array.new(@jobs) {
329
+ Thread.new{
330
+ while data = q.pop
331
+ break unless data
332
+ puts "> starting upload part #{data[0]}, #{data[1].length}"
333
+ resource = "/#{@object}?partNumber=#{data[0]}&uploadId=#{upload_id}"
334
+ response = @api.execute_storage(RestParameter.new(:put, resource, @options)) do
335
+ data[1]
336
+ end
337
+ puts "< finished upload part #{data[0]}, #{data[1].length}"
338
+ upload_objects[data[0]] = response.headers['ETag'].first
339
+ end
340
+ q.push nil
341
+ }
342
+ }
343
+
344
+ begin
345
+ file_index = 1
346
+ while true
347
+ buffer = ifp.read(@splitsz)
348
+ break unless buffer
349
+ buffer.force_encoding("ASCII-8BIT")
350
+
351
+ q.push [file_index, buffer]
352
+ file_index += 1
353
+ end
354
+ q.push nil
355
+ end
356
+
357
+ th.map(&:join)
358
+ puts "finished upload #{file_index-1} part objects."
359
+ end
360
+ end
361
+ end
362
+ end
363
+ end