s3-client 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,149 @@
1
+ module S3
2
+ class Client::API
3
+ class RestParameter
4
+
5
+ def initialize(method, resource, cano_resource: nil, query_params: {},
6
+ parameters: {}, bucket: '', content_type: nil, import: false,
7
+ raw_data: false, blank_body: false, headers: {}, multipart: false)
8
+
9
+ @method = method
10
+ @resource = resource
11
+ @cano_resource = cano_resource
12
+ @query_params = query_params
13
+ @parameters = parameters
14
+ @bucket = bucket
15
+ @content_type = content_type
16
+ @import = import
17
+ @raw_data = raw_data
18
+ @blank_body = blank_body
19
+ @headers = headers
20
+ @multipart = multipart
21
+ end
22
+
23
+ attr_reader :method
24
+ attr_reader :resource
25
+ attr_reader :cano_resource
26
+ attr_reader :query_params
27
+ attr_reader :parameters
28
+ attr_reader :bucket
29
+ attr_reader :content_type
30
+ attr_reader :headers
31
+
32
+ def url(uri, force_path_style = false)
33
+ url = uri.host
34
+ url += ":#{uri.port}" unless uri.port == 80 || uri.port == 443
35
+
36
+ if @bucket.present?
37
+ if force_path_style
38
+ url += '/' unless url.end_with? "/"
39
+ url += @bucket
40
+ else
41
+ url = [@bucket, url].join('.')
42
+ url += '/' unless url.end_with? "/"
43
+ end
44
+ end
45
+
46
+ if @bucket.blank? || @resource != '/'
47
+ url = File.join(url, @resource)
48
+ end
49
+
50
+ url += '/' if url.split('/').last == @bucket
51
+ url += '?' if @cano_resource.present? || @query_params.present?
52
+ url += @cano_resource if @cano_resource
53
+ url += '&' if @cano_resource.present? && @query_params.present?
54
+ url += "#{@query_params.to_param}" if @query_params.present?
55
+
56
+ uri.scheme + '://' + url
57
+ end
58
+
59
+ def http_verb
60
+ @method.to_s.upcase
61
+ end
62
+
63
+ def signature_content_type
64
+ result = ""
65
+ if @content_type.present?
66
+ result << @content_type
67
+ end
68
+
69
+ result << "\n"
70
+
71
+ result
72
+ end
73
+
74
+ def authentication(access_key_id, secret_access_key, force_path_style)
75
+
76
+ "AWS" + " " + access_key_id + ":" + signature(secret_access_key, force_path_style)
77
+ end
78
+
79
+ def signature(secret_access_key, force_path_style = false)
80
+ http_verb = "#{self.http_verb}\n"
81
+ content_md5 = "\n"
82
+ content_type = signature_content_type
83
+ date = "#{calc_date}\n"
84
+
85
+ canonicalized_aws_headers = ""
86
+
87
+ string_to_sign = http_verb + content_md5 + content_type + date +
88
+ canonicalized_aws_headers + canonicalized_resource(force_path_style)
89
+
90
+ digest = OpenSSL::HMAC.digest(OpenSSL::Digest.new('sha1'), secret_access_key, string_to_sign)
91
+ Base64.encode64(digest).strip
92
+ end
93
+
94
+ def canonicalized_resource(force_path_style = false)
95
+ result = ''
96
+
97
+ if @bucket.present?
98
+ result = '/'
99
+ result += "#{@bucket}/"
100
+ end
101
+
102
+ if @bucket.blank? || @resource != '/'
103
+ result = File.join(result, @resource)
104
+ end
105
+
106
+ result += '?' if @cano_resource.present?
107
+ result += @cano_resource if @cano_resource
108
+
109
+ result
110
+ end
111
+
112
+ def calc_date
113
+ return @date if @date
114
+ @date = Time.now.httpdate
115
+
116
+ @date
117
+ end
118
+
119
+ def import?
120
+ @import
121
+ end
122
+
123
+ def multipart?
124
+ @multipart
125
+ end
126
+
127
+ def raw_data?
128
+ @raw_data
129
+ end
130
+
131
+ def blank_body?
132
+ @blank_body
133
+ end
134
+
135
+ def to_s
136
+ [
137
+ "method:#{@method}",
138
+ "resource: #{@resource}",
139
+ "cano_resource: #{@cano_resource}",
140
+ "query_params: #{@query_params}",
141
+ "bucket: #{@bucket}",
142
+ "parameters: #{@parameters}",
143
+ "headers: #{@headers}"
144
+ ].join(", ")
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,363 @@
1
+ require 'zlib'
2
+ require 'mime-types'
3
+ require 'singleton'
4
+
5
+ module S3
6
+ class Client::API
7
+ module Storage
8
+ def buckets
9
+ execute_storage(RestParameter.new(:get, '/'))
10
+ end
11
+
12
+ def objects(bucket, prefix: nil, max: nil, marker: nil, delimiter: nil)
13
+ resource = '/'
14
+ query_params = {}
15
+ if prefix
16
+ query_params.merge!('prefix' => prefix)
17
+ end
18
+
19
+ if max
20
+ query_params.merge!('max-keys' => max)
21
+ end
22
+
23
+ if marker
24
+ query_params.merge!('marker' => marker)
25
+ end
26
+
27
+ if delimiter
28
+ query_params.merge!('delimiter' => delimiter)
29
+ end
30
+
31
+ execute_storage(RestParameter.new(:get, resource, bucket: bucket, query_params: query_params))
32
+ end
33
+
34
+ def create_bucket(bucket, options = {})
35
+ resource = '/'
36
+
37
+ options = options.merge(bucket: bucket, content_type: 'application/xml')
38
+ execute_storage(RestParameter.new(:put, resource, options)) do
39
+ root = REXML::Element.new('CreateBucketConfiguration')
40
+ root.add_attribute('xmlns', 'http://s3.amazonaws.com/doc/2006-03-01/')
41
+ child = REXML::Element.new('LocationConstraint')
42
+ child.add_text(@location)
43
+ root.add_element(child)
44
+ root
45
+ end
46
+ end
47
+
48
+ def create_object(bucket, object_name, options = {}, &block)
49
+ resource = "/#{object_name}"
50
+
51
+ type = MIME::Types.type_for(object_name).first
52
+ content_type = type ? type.to_s : 'application/octet-stream'
53
+ options = options.merge(bucket: bucket, content_type: content_type)
54
+ execute_storage(RestParameter.new(:put, resource, options), &block)
55
+ end
56
+
57
+ def create_multipart_object(bucket, object_name, options = {}, &block)
58
+ mu = MultipartUpload.new(bucket, object_name, options) do
59
+ self
60
+ end
61
+
62
+ # Initiate Multipart Upload
63
+ upload_id = mu.initiate_multipart_upload
64
+
65
+ begin
66
+ # Upload Part
67
+ upload_objects = mu.upload_part(upload_id, &block)
68
+
69
+ # Complete Multipart Upload
70
+ mu.complete_multipart_upload(upload_id, upload_objects)
71
+
72
+ rescue => e
73
+ # Abort Multipart Upload
74
+ mu.abort_multipart_upload(upload_id)
75
+
76
+ raise e
77
+ end
78
+ end
79
+
80
+ def get_object(bucket, object, range = nil)
81
+ resource = "/#{object}"
82
+ headers = {}
83
+ if range
84
+ bt = "bytes=#{range.first}-"
85
+ bt += "#{range.last}" if range.last != -1
86
+ headers[:Range] = bt
87
+ end
88
+ execute_storage(RestParameter.new(:get, resource, bucket: bucket, raw_data: true, headers: headers))
89
+ end
90
+
91
+ def delete_bucket(bucket)
92
+ resource = '/'
93
+ execute_storage(RestParameter.new(:delete, resource, bucket: bucket))
94
+ end
95
+
96
+ def delete_object(bucket, object)
97
+ resource = "/#{object}"
98
+ execute_storage(RestParameter.new(:delete, resource, bucket: bucket, content_type: 'application/json'))
99
+ end
100
+
101
+ def import(db_name, tbl_name, file_paths, options = {})
102
+ _import = Import.new(db_name, tbl_name, file_paths, options) do
103
+ self
104
+ end
105
+
106
+ # calc label suffix => Fixnum
107
+ suffix = _import.calc_label_suffix
108
+
109
+ # import execute
110
+ upload_objects = _import.execute(suffix)
111
+
112
+ STDERR.puts "finished upload #{upload_objects.size} objects."
113
+ STDERR.puts
114
+ STDERR.puts 'upload_objects:'
115
+ upload_objects.each do |o|
116
+ STDERR.puts o
117
+ end
118
+ end
119
+
120
+ private
121
+
122
+ class Import
123
+ def initialize(db_name, tbl_name, file_paths, options = {}, &block)
124
+ @db_name = db_name
125
+ @tbl_naem = tbl_name
126
+ @file_paths = file_paths
127
+ @jobs = options.delete(:jobs) || 1
128
+ @label = options.delete(:label) || 'label'
129
+ @splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100M
130
+ @api = block[]
131
+
132
+ import_parameter = ImportParameter.instance
133
+ import_parameter.db_name = db_name
134
+ import_parameter.tbl_name = tbl_name
135
+ import_parameter.label = @label
136
+
137
+ if %w(_ .).include? @label[0]
138
+ raise S3::Client::ParameterInvalid.new("label should not start with '_' or '.'")
139
+ end
140
+
141
+ STDERR.puts "Initialize...\njobs: #{@jobs}, splitsz: #{@splitsz}"
142
+ end
143
+
144
+ def calc_label_suffix
145
+ prefix = ImportParameter.instance.storage_prefix
146
+ xml_doc = @api.objects(@db_name, prefix: prefix)
147
+ objects_result = S3::Concerns::ObjectsResult.new(xml_doc)
148
+ objects = objects_result.objects
149
+
150
+ return 0 if objects.blank?
151
+
152
+ objects.map { |o| o.scan(/#{@label}_(\d+)/) }.flatten.map(&:to_i).sort.reverse.first.try(:+, 1)
153
+ end
154
+
155
+ def execute(suffix)
156
+ file_paths = @file_paths.is_a?(String) ? [@file_paths] : @file_paths
157
+
158
+ upload_objects = []
159
+ file_paths.each do |file_path|
160
+ file_index = if file_path.end_with?('.gz')
161
+ import_gz_file(file_path, suffix, upload_objects)
162
+ elsif file_path == "-"
163
+ import_stream($stdin, suffix, upload_objects)
164
+ else
165
+ import_text_file(file_path, suffix, upload_objects)
166
+ end
167
+
168
+ suffix += file_index
169
+ end
170
+
171
+ return upload_objects
172
+ end
173
+
174
+ def import_gz_file(file_path, suffix, upload_objects)
175
+ import_stream(Zlib::GzipReader.open(file_path), suffix, upload_objects)
176
+ rescue Zlib::Error
177
+ #if not gzip
178
+ import_text_file(file_path, suffix, upload_objects)
179
+ end
180
+
181
+ def import_text_file(file_path, suffix, upload_objects)
182
+ import_stream(File.open(file_path), suffix, upload_objects)
183
+ end
184
+
185
+ def import_stream(ifp, suffix, upload_objects)
186
+ q = SizedQueue.new(@jobs)
187
+ th = Array.new(@jobs) {
188
+ Thread.new{
189
+ while data = q.pop
190
+ break unless data
191
+ STDERR.puts "> starting upload part #{data[2]}, #{data[1].length}"
192
+ execute_storage_detail(data[1], suffix + data[0])
193
+ STDERR.puts "< finished upload part #{data[2]}, #{data[1].length}"
194
+ upload_objects << ImportParameter.instance.object_label(suffix + data[0])
195
+ end
196
+ q.push nil
197
+ }
198
+ }
199
+
200
+ begin
201
+ file_index = 0
202
+ import_index = ImportParameter.instance.index
203
+ while true
204
+ buffer = ifp.read(@splitsz)
205
+ break unless buffer
206
+ buffer.force_encoding("ASCII-8BIT")
207
+ nline = ifp.gets
208
+ if nline
209
+ nline.force_encoding("ASCII-8BIT")
210
+ buffer.concat(nline)
211
+ end
212
+ q.push [file_index, buffer, import_index]
213
+ file_index += 1
214
+ import_index += 1
215
+ end
216
+ q.push nil
217
+ end
218
+
219
+ th.map(&:join)
220
+ ifp.close
221
+
222
+ file_index
223
+ end
224
+
225
+ def execute_storage_detail(data, suffix)
226
+ str = StringIO.new
227
+ gz = Zlib::GzipWriter.new(str)
228
+ gz.write data
229
+ gz.close
230
+
231
+ options = {
232
+ content_type: 'application/x-gzip',
233
+ bucket: @db_name,
234
+ import: true
235
+ }
236
+
237
+ resource = ImportParameter.instance.url(suffix)
238
+ @api.execute_storage(RestParameter.new(:put, resource, options)) do
239
+ str.string
240
+ end
241
+ end
242
+
243
+ class ImportParameter
244
+ include Singleton
245
+
246
+ attr_accessor :db_name, :tbl_name, :label, :index
247
+
248
+ def initialize
249
+ @index = 1
250
+ end
251
+
252
+ def url(suffix)
253
+ "/#{@tbl_name}/#{@label}_#{suffix}.gz"
254
+ end
255
+
256
+ def object_label(suffix)
257
+ "/#{@db_name}/#{@tbl_name}/#{@label}_#{suffix}.gz"
258
+ end
259
+
260
+ def file_label(suffix)
261
+ "#{@label}_#{suffix}"
262
+ end
263
+
264
+ def storage_prefix
265
+ "#{@tbl_name}/#{@label}"
266
+ end
267
+ end
268
+ end
269
+
270
+ class MultipartUpload
271
+ def initialize(bucket, object, options = {}, &block)
272
+ type = MIME::Types.type_for(object).first
273
+ content_type = type ? type.to_s : 'application/octet-stream'
274
+ options = options.merge(bucket: bucket, content_type: content_type)
275
+
276
+ @bucket = bucket
277
+ @object = object
278
+ @splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100MB
279
+ @jobs = options.delete(:jobs) || 1
280
+ @options = options
281
+ @api = block[]
282
+ end
283
+
284
+ def initiate_multipart_upload
285
+ STDERR.puts "Initiate multipart upload...\njobs:#{@jobs}, splitsz:#{@splitsz}"
286
+ resource = "/#{@object}?uploads"
287
+ response = @api.execute_storage(RestParameter.new(:post, resource, @options))
288
+ upload_id = response.elements['InitiateMultipartUploadResult/UploadId'].text
289
+ return upload_id
290
+ end
291
+
292
+ def upload_part(upload_id, &block)
293
+ upload_objects = {}
294
+ split_stream(upload_id, upload_objects, &block)
295
+ return Hash[upload_objects.sort]
296
+ end
297
+
298
+ def complete_multipart_upload(upload_id, upload_objects)
299
+ resource = "/#{@object}?uploadId=#{upload_id}"
300
+
301
+ payload = '<CompleteMultipartUpload>'
302
+ upload_objects.each do |part, etag|
303
+ payload += "<Part><PartNumber>#{part}</PartNumber><ETag>#{etag}</ETag></Part>"
304
+ end
305
+ payload += '</CompleteMultipartUpload>'
306
+
307
+ @api.execute_storage(RestParameter.new(:post, resource, @options)) do
308
+ payload
309
+ end
310
+
311
+ puts "complete multipart upload."
312
+ end
313
+
314
+ def abort_multipart_upload(upload_id)
315
+ resource = "/#{@object}?uploadId=#{upload_id}"
316
+ @api.execute_storage(RestParameter.new(:delete, resource, @options))
317
+ end
318
+
319
+ private
320
+
321
+ def split_stream(upload_id, upload_objects, &block)
322
+ limit = 5 * 1024 ** 2 #5MB
323
+ raise "split size is invalid. below lower limit of #{limit} byte" if @splitsz < limit
324
+
325
+ ifp = block[]
326
+
327
+ q = SizedQueue.new(@jobs)
328
+ th = Array.new(@jobs) {
329
+ Thread.new{
330
+ while data = q.pop
331
+ break unless data
332
+ puts "> starting upload part #{data[0]}, #{data[1].length}"
333
+ resource = "/#{@object}?partNumber=#{data[0]}&uploadId=#{upload_id}"
334
+ response = @api.execute_storage(RestParameter.new(:put, resource, @options)) do
335
+ data[1]
336
+ end
337
+ puts "< finished upload part #{data[0]}, #{data[1].length}"
338
+ upload_objects[data[0]] = response.headers['ETag'].first
339
+ end
340
+ q.push nil
341
+ }
342
+ }
343
+
344
+ begin
345
+ file_index = 1
346
+ while true
347
+ buffer = ifp.read(@splitsz)
348
+ break unless buffer
349
+ buffer.force_encoding("ASCII-8BIT")
350
+
351
+ q.push [file_index, buffer]
352
+ file_index += 1
353
+ end
354
+ q.push nil
355
+ end
356
+
357
+ th.map(&:join)
358
+ puts "finished upload #{file_index-1} part objects."
359
+ end
360
+ end
361
+ end
362
+ end
363
+ end