aliyun-sdk 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,124 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../../../../lib", __FILE__))
4
+ require 'yaml'
5
+ require 'aliyun/oss'
6
+
7
+ ##
8
+ # 一般来说用户在上传object和下载object时只需要指定文件名就可以满足需要:
9
+ # - 在上传的时候client会从指定的文件中读取数据上传到OSS
10
+ # - 在下载的时候client会把从OSS下载的数据写入到指定的文件中
11
+ #
12
+ # 在某些情况下用户可能会需要流式地上传和下载:
13
+ # - 用户要写入到object中的数据不能立即得到全部,而是从网络中流式获取,
14
+ # 然后再一段一段地写入到OSS中
15
+ # - 用户要写入到object的数据是经过运算得出,每次得到一部分,用户不希望
16
+ # 保留所有的数据然后一次性写入到OSS
17
+ # - 用户下载的object很大,用户不希望一次性把它们下载到内存中,而是希望
18
+ # 获取一部分就处理一部分;用户也不希望把它先下载到文件中,然后再从文
19
+ # 件中读出来处理,这样会让数据经历不必要的拷贝
20
+ #
21
+ # 当然,对于流式上传的需求,我们可以使用OSS的appendable object来满足。
22
+ # 但是即使是normal object,利用sdk的streaming功能,也可以实现流式上传
23
+ # 和下载。
24
+
25
+ # 初始化OSS client
26
+ Aliyun::OSS::Logging.set_log_level(Logger::DEBUG)
27
+ conf_file = '~/.oss.yml'
28
+ conf = YAML.load(File.read(File.expand_path(conf_file)))
29
+ bucket = Aliyun::OSS::Client.new(
30
+ :endpoint => conf['endpoint'],
31
+ :cname => conf['cname'],
32
+ :access_key_id => conf['id'],
33
+ :access_key_secret => conf['key']).get_bucket(conf['bucket'])
34
+
35
+ # 辅助打印函数
36
+ def demo(msg)
37
+ puts "######### #{msg} ########"
38
+ puts
39
+ yield
40
+ puts "-------------------------"
41
+ puts
42
+ end
43
+
44
+ # 例子1: 归并排序
45
+ # 有两个文件sort.1, sort.2,它们分别存了一些从小到大排列的整数,每个整
46
+ # 数1行,现在要将它们做归并排序的结果上传到OSS中,命名为sort.all
47
+
48
+ local_1, local_2 = 'sort.1', 'sort.2'
49
+ result_object = 'sort.all'
50
+
51
+ File.open(File.expand_path(local_1), 'w') do |f|
52
+ [1001, 2005, 2007, 2011, 2013, 2015].each do |i|
53
+ f.puts(i.to_s)
54
+ end
55
+ end
56
+
57
+ File.open(File.expand_path(local_2), 'w') do |f|
58
+ [2009, 2010, 2012, 2017, 2020, 9999].each do |i|
59
+ f.puts(i.to_s)
60
+ end
61
+ end
62
+
63
+ demo "Streaming upload" do
64
+ bucket.put_object(result_object) do |content|
65
+ f1 = File.open(File.expand_path(local_1))
66
+ f2 = File.open(File.expand_path(local_2))
67
+ v1, v2 = f1.readline, f2.readline
68
+
69
+ until f1.eof? or f2.eof?
70
+ if v1.to_i < v2.to_i
71
+ content << v1
72
+ v1 = f1.readline
73
+ else
74
+ content << v2
75
+ v2 = f2.readline
76
+ end
77
+ end
78
+
79
+ [v1, v2].sort.each{|i| content << i}
80
+ content << f1.readline until f1.eof?
81
+ content << f2.readline until f2.eof?
82
+ end
83
+
84
+ puts "Put object: #{result_object}"
85
+
86
+ # 将文件下载下来查看
87
+ bucket.get_object(result_object, :file => result_object)
88
+ puts "Get object: #{result_object}"
89
+ puts "Content: #{File.read(result_object)}"
90
+ end
91
+
92
+ # 例子2: 下载进度条
93
+ # 下载一个大文件(10M),在下载的过程中打印下载进度
94
+
95
+ large_file = 'large_file'
96
+
97
+ demo "Streaming download" do
98
+ puts "Begin put object: #{large_file}"
99
+ # 利用streaming上传
100
+ bucket.put_object(large_file) do |stream|
101
+ 10.times { stream << "x" * (1024 * 1024) }
102
+ end
103
+
104
+ # 查看object大小
105
+ object_size = bucket.get_object(large_file).size
106
+ puts "Put object: #{large_file}, size: #{object_size}"
107
+
108
+ # 流式下载文件,仅打印进度,不保存文件
109
+ def to_percentile(v)
110
+ "#{(v * 100.0).round(2)} %"
111
+ end
112
+
113
+ puts "Begin download: #{large_file}"
114
+ last_got, got = 0, 0
115
+ bucket.get_object(large_file) do |chunk|
116
+ got += chunk.size
117
+ # 仅在下载进度大于10%的时候打印
118
+ if (got - last_got).to_f / object_size > 0.1
119
+ puts "Progress: #{to_percentile(got.to_f / object_size)}"
120
+ last_got = got
121
+ end
122
+ end
123
+ puts "Get object: #{large_file}, size: #{object_size}"
124
+ end
@@ -138,6 +138,8 @@ module Aliyun
138
138
  # @param opts [Hash] 查询选项
139
139
  # @option opts [String] :prefix 返回的object的前缀,如果设置则只
140
140
  # 返回那些名字以它为前缀的object
141
+ # @option opts [String] :marker 如果设置,则只返回名字在它之后
142
+ # (字典序,不包含marker)的object
141
143
  # @option opts [String] :delimiter 用于获取公共前缀的分隔符,从
142
144
  # 前缀后面开始到第一个分隔符出现的位置之前的字符,作为公共前缀。
143
145
  # @example
@@ -151,8 +153,6 @@ module Aliyun
151
153
  # '/foo/bar/', '/foo/xxx/'。它们恰好就是目录'/foo/'下的所有子目
152
154
  # 录。用delimiter获取公共前缀的方法避免了查询当前bucket下的所有
153
155
  # object(可能数量巨大),是用于模拟目录结构的常用做法。
154
- # @option opts [String] :encoding 指定返回的响应中object名字的编
155
- # 码方法,目前只支持{OSS::KeyEncoding::URL}编码方式。
156
156
  # @return [Enumerator<Object>] 其中Object可能是{OSS::Object},也
157
157
  # 可能是{String},此时它是一个公共前缀
158
158
  # @example
@@ -165,7 +165,8 @@ module Aliyun
165
165
  # end
166
166
  # end
167
167
  def list_objects(opts = {})
168
- Iterator::Objects.new(@protocol, name, opts).to_enum
168
+ Iterator::Objects.new(
169
+ @protocol, name, opts.merge(encoding: KeyEncoding::URL)).to_enum
169
170
  end
170
171
 
171
172
  # 向Bucket中上传一个object
@@ -192,17 +193,20 @@ module Aliyun
192
193
  # 它读到的数据为nil停止。
193
194
  # @note 如果opts中指定了:file,则block会被忽略
194
195
  def put_object(key, opts = {}, &block)
195
- file = opts[:file]
196
- if file
197
- opts[:content_type] = get_content_type(file)
196
+ args = opts.dup
198
197
 
199
- @protocol.put_object(name, key, opts) do |sw|
198
+ file = args[:file]
199
+ args[:content_type] ||= get_content_type(file) if file
200
+ args[:content_type] ||= get_content_type(key)
201
+
202
+ if file
203
+ @protocol.put_object(name, key, args) do |sw|
200
204
  File.open(File.expand_path(file), 'rb') do |f|
201
205
  sw << f.read(Protocol::STREAM_CHUNK_SIZE) until f.eof?
202
206
  end
203
207
  end
204
208
  else
205
- @protocol.put_object(name, key, opts, &block)
209
+ @protocol.put_object(name, key, args, &block)
206
210
  end
207
211
  end
208
212
 
@@ -309,18 +313,20 @@ module Aliyun
309
313
  # @return [Integer] 返回下次append的位置
310
314
  # @yield [HTTP::StreamWriter] 同 {#put_object}
311
315
  def append_object(key, pos, opts = {}, &block)
312
- next_pos = -1
313
- file = opts[:file]
314
- if file
315
- opts[:content_type] = get_content_type(file)
316
+ args = opts.dup
316
317
 
317
- next_pos = @protocol.append_object(name, key, pos, opts) do |sw|
318
+ file = args[:file]
319
+ args[:content_type] ||= get_content_type(file) if file
320
+ args[:content_type] ||= get_content_type(key)
321
+
322
+ if file
323
+ next_pos = @protocol.append_object(name, key, pos, args) do |sw|
318
324
  File.open(File.expand_path(file), 'rb') do |f|
319
325
  sw << f.read(Protocol::STREAM_CHUNK_SIZE) until f.eof?
320
326
  end
321
327
  end
322
328
  else
323
- next_pos = @protocol.append_object(name, key, pos, opts, &block)
329
+ next_pos = @protocol.append_object(name, key, pos, args, &block)
324
330
  end
325
331
 
326
332
  next_pos
@@ -345,7 +351,10 @@ module Aliyun
345
351
  # * :etag [String] 目标文件的ETag
346
352
  # * :last_modified [Time] 目标文件的最后修改时间
347
353
  def copy_object(source, dest, opts = {})
348
- @protocol.copy_object(name, source, dest, opts)
354
+ args = opts.dup
355
+
356
+ args[:content_type] ||= get_content_type(dest)
357
+ @protocol.copy_object(name, source, dest, args)
349
358
  end
350
359
 
351
360
  # 删除一个object
@@ -358,13 +367,12 @@ module Aliyun
358
367
  # @param keys [Array<String>] Object的名字集合
359
368
  # @param opts [Hash] 删除object的选项(可选)
360
369
  # @option opts [Boolean] :quiet 指定是否允许Server返回成功删除的
361
- # object
362
- # @option opts [String] :encoding 指定Server返回的成功删除的
363
- # object的名字的编码方式,目前只支持{OSS::KeyEncoding::URL}
370
+ # object,默认为false,即返回删除结果
364
371
  # @return [Array<String>] 成功删除的object的名字,如果指定
365
372
  # 了:quiet参数,则返回[]
366
373
  def batch_delete_objects(keys, opts = {})
367
- @protocol.batch_delete_objects(name, keys, opts)
374
+ @protocol.batch_delete_objects(
375
+ name, keys, opts.merge(encoding: KeyEncoding::URL))
368
376
  end
369
377
 
370
378
  # 设置object的ACL
@@ -425,12 +433,17 @@ module Aliyun
425
433
  # puts "Progress: #{(p * 100).round(2)} %"
426
434
  # end
427
435
  def resumable_upload(key, file, opts = {}, &block)
428
- unless cpt_file = opts[:cpt_file]
436
+ args = opts.dup
437
+
438
+ args[:content_type] ||= get_content_type(file)
439
+ args[:content_type] ||= get_content_type(key)
440
+
441
+ unless cpt_file = args[:cpt_file]
429
442
  cpt_file = get_cpt_file(file)
430
443
  end
431
444
 
432
445
  Multipart::Upload.new(
433
- @protocol, options: opts,
446
+ @protocol, options: args,
434
447
  progress: block,
435
448
  object: key, bucket: name, creation_time: Time.now,
436
449
  file: File.expand_path(file), cpt_file: cpt_file
@@ -477,18 +490,60 @@ module Aliyun
477
490
  # puts "Progress: #{(p * 100).round(2)} %"
478
491
  # end
479
492
  def resumable_download(key, file, opts = {}, &block)
480
- unless cpt_file = opts[:cpt_file]
493
+ args = opts.dup
494
+
495
+ args[:content_type] ||= get_content_type(file)
496
+ args[:content_type] ||= get_content_type(key)
497
+
498
+ unless cpt_file = args[:cpt_file]
481
499
  cpt_file = get_cpt_file(file)
482
500
  end
483
501
 
484
502
  Multipart::Download.new(
485
- @protocol, options: opts,
503
+ @protocol, options: args,
486
504
  progress: block,
487
505
  object: key, bucket: name, creation_time: Time.now,
488
506
  file: File.expand_path(file), cpt_file: cpt_file
489
507
  ).run
490
508
  end
491
509
 
510
+ # 列出此Bucket中正在进行的multipart上传请求,不包括已经完成或者
511
+ # 被取消的。
512
+ # @param [Hash] opts 可选项
513
+ # @option opts [String] :key_marker object key的标记,根据有没有
514
+ # 设置:id_marker,:key_marker的含义不同:
515
+ # 1. 如果未设置:id_marker,则只返回object key在:key_marker之后
516
+ # (字典序,不包含marker)的upload请求
517
+ # 2. 如果设置了:id_marker,则返回object key在:key_marker之后
518
+ # (字典序,不包含marker)的uplaod请求*和*Object
519
+ # key与:key_marker相等,*且*upload id在:id_marker之后(字母
520
+ # 表顺序排序,不包含marker)的upload请求
521
+ # @option opts [String] :id_marker upload id的标记,如
522
+ # 果:key_marker没有设置,则此参数会被忽略;否则与:key_marker一起
523
+ # 决定返回的结果(见上)
524
+ # @option opts [String] :prefix 如果指定,则只返回object key中符
525
+ # 合指定前缀的upload请求
526
+ # @return [Enumerator<Multipart::Transaction>] 其中每一个元素表
527
+ # 示一个upload请求
528
+ # @example
529
+ # key_marker = 1, id_marker = null
530
+ # # return <2, 0>, <2, 1>, <3, 0> ...
531
+ # key_marker = 1, id_marker = 5
532
+ # # return <1, 6>, <1, 7>, <2, 0>, <3, 0> ...
533
+ def list_uploads(opts = {})
534
+ Iterator::Uploads.new(
535
+ @protocol, name, opts.merge(encoding: KeyEncoding::URL)).to_enum
536
+ end
537
+
538
+ # 取消一个multipart上传请求,一般用于清除Bucket下因断点上传而产
539
+ # 生的文件碎片。成功取消后属于这个上传请求的分片都会被清除。
540
+ # @param [String] upload_id 上传请求的id,可通过{#list_uploads}
541
+ # 获得
542
+ # @param [String] key Object的名字
543
+ def abort_upload(upload_id, key)
544
+ @protocol.abort_multipart_upload(name, key, upload_id)
545
+ end
546
+
492
547
  # 获取Bucket的URL
493
548
  # @return [String] Bucket的URL
494
549
  def bucket_url
@@ -46,6 +46,8 @@ module Aliyun
46
46
  # 列出当前所有的bucket
47
47
  # @param opts [Hash] 查询选项
48
48
  # @option opts [String] :prefix 如果设置,则只返回以它为前缀的bucket
49
+ # @option opts [String] :marker 如果设置,则只返回名字在它之后
50
+ # (字典序,不包含marker)的bucket
49
51
  # @return [Enumerator<Bucket>] Bucket的迭代器
50
52
  def list_buckets(opts = {})
51
53
  if @config.cname
@@ -9,16 +9,22 @@ module Aliyun
9
9
  class Download < Transaction
10
10
  PART_SIZE = 10 * 1024 * 1024
11
11
  READ_SIZE = 16 * 1024
12
+ NUM_THREAD = 10
12
13
 
13
14
  def initialize(protocol, opts)
14
15
  args = opts.dup
15
16
  @protocol = protocol
16
17
  @progress = args.delete(:progress)
17
18
  @file = args.delete(:file)
18
- @checkpoint_file = args.delete(:cpt_file)
19
+ @cpt_file = args.delete(:cpt_file)
20
+ super(args)
21
+
19
22
  @object_meta = {}
23
+ @num_threads = options[:threads] || NUM_THREAD
24
+ @all_mutex = Mutex.new
20
25
  @parts = []
21
- super(args)
26
+ @todo_mutex = Mutex.new
27
+ @todo_parts = []
22
28
  end
23
29
 
24
30
  # Run the download transaction, which includes 3 stages:
@@ -27,8 +33,9 @@ module Aliyun
27
33
  # * 2. download each unfinished part
28
34
  # * 3. combine the downloaded parts into the final file
29
35
  def run
30
- logger.info("Begin download, file: #{@file}, checkpoint file: "\
31
- "#{@checkpoint_file}")
36
+ logger.info("Begin download, file: #{@file}, "\
37
+ "checkpoint file: #{@cpt_file}, "\
38
+ "threads: #{@num_threads}")
32
39
 
33
40
  # Rebuild transaction states from checkpoint file
34
41
  # Or initiate new transaction states
@@ -38,7 +45,17 @@ module Aliyun
38
45
  divide_parts if @parts.empty?
39
46
 
40
47
  # Download each part(object range)
41
- @parts.reject { |p| p[:done]}.each { |p| download_part(p) }
48
+ @todo_parts = @parts.reject { |p| p[:done] }
49
+
50
+ (1..@num_threads).map {
51
+ Thread.new {
52
+ loop {
53
+ p = sync_get_todo_part
54
+ break unless p
55
+ download_part(p)
56
+ }
57
+ }
58
+ }.map(&:join)
42
59
 
43
60
  # Combine the parts into the final file
44
61
  commit
@@ -62,25 +79,26 @@ module Aliyun
62
79
  # :md5 => 'states_md5'
63
80
  # }
64
81
  def checkpoint
65
- logger.debug("Begin make checkpoint, "\
66
- "disable_cpt: #{options[:disable_cpt]}")
82
+ logger.debug("Begin make checkpoint, disable_cpt: "\
83
+ "#{options[:disable_cpt] == true}")
67
84
 
68
85
  ensure_object_not_changed
69
86
 
87
+ parts = sync_get_all_parts
70
88
  states = {
71
89
  :id => id,
72
90
  :file => @file,
73
91
  :object_meta => @object_meta,
74
- :parts => @parts
92
+ :parts => parts
75
93
  }
76
94
 
77
95
  # report progress
78
96
  if @progress
79
- done = @parts.count { |p| p[:done] }
80
- @progress.call(done.to_f / @parts.size) if done > 0
97
+ done = parts.count { |p| p[:done] }
98
+ @progress.call(done.to_f / parts.size) if done > 0
81
99
  end
82
100
 
83
- write_checkpoint(states, @checkpoint_file) unless options[:disable_cpt]
101
+ write_checkpoint(states, @cpt_file) unless options[:disable_cpt]
84
102
 
85
103
  logger.debug("Done make checkpoint, states: #{states}")
86
104
  end
@@ -91,31 +109,33 @@ module Aliyun
91
109
  def commit
92
110
  logger.info("Begin commit transaction, id: #{id}")
93
111
 
112
+ parts = sync_get_all_parts
94
113
  # concat all part files into the target file
95
114
  File.open(@file, 'w') do |w|
96
- @parts.sort{ |x, y| x[:number] <=> y[:number] }.each do |p|
97
- File.open(get_part_file(p[:number])) do |r|
115
+ parts.sort{ |x, y| x[:number] <=> y[:number] }.each do |p|
116
+ File.open(get_part_file(p)) do |r|
98
117
  w.write(r.read(READ_SIZE)) until r.eof?
99
118
  end
100
119
  end
101
120
  end
102
121
 
103
- File.delete(@checkpoint_file) unless options[:disable_cpt]
104
- @parts.each{ |p| File.delete(get_part_file(p[:number])) }
122
+ File.delete(@cpt_file) unless options[:disable_cpt]
123
+ parts.each{ |p| File.delete(get_part_file(p)) }
105
124
 
106
125
  logger.info("Done commit transaction, id: #{id}")
107
126
  end
108
127
 
109
128
  # Rebuild the states of the transaction from checkpoint file
110
129
  def rebuild
111
- logger.info("Begin rebuild transaction, "\
112
- "checkpoint: #{@checkpoint_file}")
130
+ logger.info("Begin rebuild transaction, checkpoint: #{@cpt_file}")
113
131
 
114
- if File.exists?(@checkpoint_file) and not options[:disable_cpt]
115
- states = load_checkpoint(@checkpoint_file)
132
+ if options[:disable_cpt] || !File.exists?(@cpt_file)
133
+ initiate
134
+ else
135
+ states = load_checkpoint(@cpt_file)
116
136
 
117
137
  states[:parts].select{ |p| p[:done] }.each do |p|
118
- part_file = get_part_file(p[:number])
138
+ part_file = get_part_file(p)
119
139
 
120
140
  unless File.exist?(part_file)
121
141
  fail PartMissingError, "The part file is missing: #{part_file}."
@@ -130,8 +150,6 @@ module Aliyun
130
150
  @id = states[:id]
131
151
  @object_meta = states[:object_meta]
132
152
  @parts = states[:parts]
133
- else
134
- initiate
135
153
  end
136
154
 
137
155
  logger.info("Done rebuild transaction, states: #{states}")
@@ -155,14 +173,13 @@ module Aliyun
155
173
  def download_part(p)
156
174
  logger.debug("Begin download part: #{p}")
157
175
 
158
- part_file = get_part_file(p[:number])
176
+ part_file = get_part_file(p)
159
177
  File.open(part_file, 'w') do |w|
160
178
  @protocol.get_object(
161
179
  bucket, object, :range => p[:range]) { |chunk| w.write(chunk) }
162
180
  end
163
181
 
164
- p[:done] = true
165
- p[:md5] = get_file_md5(part_file)
182
+ sync_update_part(p.merge(done: true, md5: get_file_md5(part_file)))
166
183
 
167
184
  checkpoint
168
185
 
@@ -191,6 +208,24 @@ module Aliyun
191
208
  logger.info("Done divide parts, parts: #{@parts}")
192
209
  end
193
210
 
211
+ def sync_get_todo_part
212
+ @todo_mutex.synchronize {
213
+ @todo_parts.shift
214
+ }
215
+ end
216
+
217
+ def sync_update_part(p)
218
+ @all_mutex.synchronize {
219
+ @parts[p[:number] - 1] = p
220
+ }
221
+ end
222
+
223
+ def sync_get_all_parts
224
+ @all_mutex.synchronize {
225
+ @parts.dup
226
+ }
227
+ end
228
+
194
229
  # Ensure file not changed during uploading
195
230
  def ensure_object_not_changed
196
231
  obj = @protocol.get_object_meta(bucket, object)
@@ -206,8 +241,8 @@ module Aliyun
206
241
  end
207
242
 
208
243
  # Get name for part file
209
- def get_part_file(number)
210
- "#{@file}.part.#{number}"
244
+ def get_part_file(p)
245
+ "#{@file}.part.#{p[:number]}"
211
246
  end
212
247
  end # Download
213
248