aliyun-sdk 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../../../../lib", __FILE__))
4
+ require 'yaml'
5
+ require 'aliyun/oss'
6
+
7
+ ##
8
+ # 一般来说用户在上传object和下载object时只需要指定文件名就可以满足需要:
9
+ # - 在上传的时候client会从指定的文件中读取数据上传到OSS
10
+ # - 在下载的时候client会把从OSS下载的数据写入到指定的文件中
11
+ #
12
+ # 在某些情况下用户可能会需要流式地上传和下载:
13
+ # - 用户要写入到object中的数据不能立即得到全部,而是从网络中流式获取,
14
+ # 然后再一段一段地写入到OSS中
15
+ # - 用户要写入到object的数据是经过运算得出,每次得到一部分,用户不希望
16
+ # 保留所有的数据然后一次性写入到OSS
17
+ # - 用户下载的object很大,用户不希望一次性把它们下载到内存中,而是希望
18
+ # 获取一部分就处理一部分;用户也不希望把它先下载到文件中,然后再从文
19
+ # 件中读出来处理,这样会让数据经历不必要的拷贝
20
+ #
21
+ # 当然,对于流式上传的需求,我们可以使用OSS的appendable object来满足。
22
+ # 但是即使是normal object,利用sdk的streaming功能,也可以实现流式上传
23
+ # 和下载。
24
+
25
+ # 初始化OSS client
26
+ Aliyun::OSS::Logging.set_log_level(Logger::DEBUG)
27
+ conf_file = '~/.oss.yml'
28
+ conf = YAML.load(File.read(File.expand_path(conf_file)))
29
+ bucket = Aliyun::OSS::Client.new(
30
+ :endpoint => conf['endpoint'],
31
+ :cname => conf['cname'],
32
+ :access_key_id => conf['id'],
33
+ :access_key_secret => conf['key']).get_bucket(conf['bucket'])
34
+
35
+ # 辅助打印函数
36
+ def demo(msg)
37
+ puts "######### #{msg} ########"
38
+ puts
39
+ yield
40
+ puts "-------------------------"
41
+ puts
42
+ end
43
+
44
+ # 例子1: 归并排序
45
+ # 有两个文件sort.1, sort.2,它们分别存了一些从小到大排列的整数,每个整
46
+ # 数1行,现在要将它们做归并排序的结果上传到OSS中,命名为sort.all
47
+
48
+ local_1, local_2 = 'sort.1', 'sort.2'
49
+ result_object = 'sort.all'
50
+
51
+ File.open(File.expand_path(local_1), 'w') do |f|
52
+ [1001, 2005, 2007, 2011, 2013, 2015].each do |i|
53
+ f.puts(i.to_s)
54
+ end
55
+ end
56
+
57
+ File.open(File.expand_path(local_2), 'w') do |f|
58
+ [2009, 2010, 2012, 2017, 2020, 9999].each do |i|
59
+ f.puts(i.to_s)
60
+ end
61
+ end
62
+
63
+ demo "Streaming upload" do
64
+ bucket.put_object(result_object) do |content|
65
+ f1 = File.open(File.expand_path(local_1))
66
+ f2 = File.open(File.expand_path(local_2))
67
+ v1, v2 = f1.readline, f2.readline
68
+
69
+ until f1.eof? or f2.eof?
70
+ if v1.to_i < v2.to_i
71
+ content << v1
72
+ v1 = f1.readline
73
+ else
74
+ content << v2
75
+ v2 = f2.readline
76
+ end
77
+ end
78
+
79
+ [v1, v2].sort.each{|i| content << i}
80
+ content << f1.readline until f1.eof?
81
+ content << f2.readline until f2.eof?
82
+ end
83
+
84
+ puts "Put object: #{result_object}"
85
+
86
+ # 将文件下载下来查看
87
+ bucket.get_object(result_object, :file => result_object)
88
+ puts "Get object: #{result_object}"
89
+ puts "Content: #{File.read(result_object)}"
90
+ end
91
+
92
+ # 例子2: 下载进度条
93
+ # 下载一个大文件(10M),在下载的过程中打印下载进度
94
+
95
+ large_file = 'large_file'
96
+
97
+ demo "Streaming download" do
98
+ puts "Begin put object: #{large_file}"
99
+ # 利用streaming上传
100
+ bucket.put_object(large_file) do |stream|
101
+ 10.times { stream << "x" * (1024 * 1024) }
102
+ end
103
+
104
+ # 查看object大小
105
+ object_size = bucket.get_object(large_file).size
106
+ puts "Put object: #{large_file}, size: #{object_size}"
107
+
108
+ # 流式下载文件,仅打印进度,不保存文件
109
+ def to_percentile(v)
110
+ "#{(v * 100.0).round(2)} %"
111
+ end
112
+
113
+ puts "Begin download: #{large_file}"
114
+ last_got, got = 0, 0
115
+ bucket.get_object(large_file) do |chunk|
116
+ got += chunk.size
117
+ # 仅在下载进度大于10%的时候打印
118
+ if (got - last_got).to_f / object_size > 0.1
119
+ puts "Progress: #{to_percentile(got.to_f / object_size)}"
120
+ last_got = got
121
+ end
122
+ end
123
+ puts "Get object: #{large_file}, size: #{object_size}"
124
+ end
@@ -138,6 +138,8 @@ module Aliyun
138
138
  # @param opts [Hash] 查询选项
139
139
  # @option opts [String] :prefix 返回的object的前缀,如果设置则只
140
140
  # 返回那些名字以它为前缀的object
141
+ # @option opts [String] :marker 如果设置,则只返回名字在它之后
142
+ # (字典序,不包含marker)的object
141
143
  # @option opts [String] :delimiter 用于获取公共前缀的分隔符,从
142
144
  # 前缀后面开始到第一个分隔符出现的位置之前的字符,作为公共前缀。
143
145
  # @example
@@ -151,8 +153,6 @@ module Aliyun
151
153
  # '/foo/bar/', '/foo/xxx/'。它们恰好就是目录'/foo/'下的所有子目
152
154
  # 录。用delimiter获取公共前缀的方法避免了查询当前bucket下的所有
153
155
  # object(可能数量巨大),是用于模拟目录结构的常用做法。
154
- # @option opts [String] :encoding 指定返回的响应中object名字的编
155
- # 码方法,目前只支持{OSS::KeyEncoding::URL}编码方式。
156
156
  # @return [Enumerator<Object>] 其中Object可能是{OSS::Object},也
157
157
  # 可能是{String},此时它是一个公共前缀
158
158
  # @example
@@ -165,7 +165,8 @@ module Aliyun
165
165
  # end
166
166
  # end
167
167
  def list_objects(opts = {})
168
- Iterator::Objects.new(@protocol, name, opts).to_enum
168
+ Iterator::Objects.new(
169
+ @protocol, name, opts.merge(encoding: KeyEncoding::URL)).to_enum
169
170
  end
170
171
 
171
172
  # 向Bucket中上传一个object
@@ -192,17 +193,20 @@ module Aliyun
192
193
  # 它读到的数据为nil停止。
193
194
  # @note 如果opts中指定了:file,则block会被忽略
194
195
  def put_object(key, opts = {}, &block)
195
- file = opts[:file]
196
- if file
197
- opts[:content_type] = get_content_type(file)
196
+ args = opts.dup
198
197
 
199
- @protocol.put_object(name, key, opts) do |sw|
198
+ file = args[:file]
199
+ args[:content_type] ||= get_content_type(file) if file
200
+ args[:content_type] ||= get_content_type(key)
201
+
202
+ if file
203
+ @protocol.put_object(name, key, args) do |sw|
200
204
  File.open(File.expand_path(file), 'rb') do |f|
201
205
  sw << f.read(Protocol::STREAM_CHUNK_SIZE) until f.eof?
202
206
  end
203
207
  end
204
208
  else
205
- @protocol.put_object(name, key, opts, &block)
209
+ @protocol.put_object(name, key, args, &block)
206
210
  end
207
211
  end
208
212
 
@@ -309,18 +313,20 @@ module Aliyun
309
313
  # @return [Integer] 返回下次append的位置
310
314
  # @yield [HTTP::StreamWriter] 同 {#put_object}
311
315
  def append_object(key, pos, opts = {}, &block)
312
- next_pos = -1
313
- file = opts[:file]
314
- if file
315
- opts[:content_type] = get_content_type(file)
316
+ args = opts.dup
316
317
 
317
- next_pos = @protocol.append_object(name, key, pos, opts) do |sw|
318
+ file = args[:file]
319
+ args[:content_type] ||= get_content_type(file) if file
320
+ args[:content_type] ||= get_content_type(key)
321
+
322
+ if file
323
+ next_pos = @protocol.append_object(name, key, pos, args) do |sw|
318
324
  File.open(File.expand_path(file), 'rb') do |f|
319
325
  sw << f.read(Protocol::STREAM_CHUNK_SIZE) until f.eof?
320
326
  end
321
327
  end
322
328
  else
323
- next_pos = @protocol.append_object(name, key, pos, opts, &block)
329
+ next_pos = @protocol.append_object(name, key, pos, args, &block)
324
330
  end
325
331
 
326
332
  next_pos
@@ -345,7 +351,10 @@ module Aliyun
345
351
  # * :etag [String] 目标文件的ETag
346
352
  # * :last_modified [Time] 目标文件的最后修改时间
347
353
  def copy_object(source, dest, opts = {})
348
- @protocol.copy_object(name, source, dest, opts)
354
+ args = opts.dup
355
+
356
+ args[:content_type] ||= get_content_type(dest)
357
+ @protocol.copy_object(name, source, dest, args)
349
358
  end
350
359
 
351
360
  # 删除一个object
@@ -358,13 +367,12 @@ module Aliyun
358
367
  # @param keys [Array<String>] Object的名字集合
359
368
  # @param opts [Hash] 删除object的选项(可选)
360
369
  # @option opts [Boolean] :quiet 指定是否允许Server返回成功删除的
361
- # object
362
- # @option opts [String] :encoding 指定Server返回的成功删除的
363
- # object的名字的编码方式,目前只支持{OSS::KeyEncoding::URL}
370
+ # object,默认为false,即返回删除结果
364
371
  # @return [Array<String>] 成功删除的object的名字,如果指定
365
372
  # 了:quiet参数,则返回[]
366
373
  def batch_delete_objects(keys, opts = {})
367
- @protocol.batch_delete_objects(name, keys, opts)
374
+ @protocol.batch_delete_objects(
375
+ name, keys, opts.merge(encoding: KeyEncoding::URL))
368
376
  end
369
377
 
370
378
  # 设置object的ACL
@@ -425,12 +433,17 @@ module Aliyun
425
433
  # puts "Progress: #{(p * 100).round(2)} %"
426
434
  # end
427
435
  def resumable_upload(key, file, opts = {}, &block)
428
- unless cpt_file = opts[:cpt_file]
436
+ args = opts.dup
437
+
438
+ args[:content_type] ||= get_content_type(file)
439
+ args[:content_type] ||= get_content_type(key)
440
+
441
+ unless cpt_file = args[:cpt_file]
429
442
  cpt_file = get_cpt_file(file)
430
443
  end
431
444
 
432
445
  Multipart::Upload.new(
433
- @protocol, options: opts,
446
+ @protocol, options: args,
434
447
  progress: block,
435
448
  object: key, bucket: name, creation_time: Time.now,
436
449
  file: File.expand_path(file), cpt_file: cpt_file
@@ -477,18 +490,60 @@ module Aliyun
477
490
  # puts "Progress: #{(p * 100).round(2)} %"
478
491
  # end
479
492
  def resumable_download(key, file, opts = {}, &block)
480
- unless cpt_file = opts[:cpt_file]
493
+ args = opts.dup
494
+
495
+ args[:content_type] ||= get_content_type(file)
496
+ args[:content_type] ||= get_content_type(key)
497
+
498
+ unless cpt_file = args[:cpt_file]
481
499
  cpt_file = get_cpt_file(file)
482
500
  end
483
501
 
484
502
  Multipart::Download.new(
485
- @protocol, options: opts,
503
+ @protocol, options: args,
486
504
  progress: block,
487
505
  object: key, bucket: name, creation_time: Time.now,
488
506
  file: File.expand_path(file), cpt_file: cpt_file
489
507
  ).run
490
508
  end
491
509
 
510
+ # 列出此Bucket中正在进行的multipart上传请求,不包括已经完成或者
511
+ # 被取消的。
512
+ # @param [Hash] opts 可选项
513
+ # @option opts [String] :key_marker object key的标记,根据有没有
514
+ # 设置:id_marker,:key_marker的含义不同:
515
+ # 1. 如果未设置:id_marker,则只返回object key在:key_marker之后
516
+ # (字典序,不包含marker)的upload请求
517
+ # 2. 如果设置了:id_marker,则返回object key在:key_marker之后
518
+ # (字典序,不包含marker)的uplaod请求*和*Object
519
+ # key与:key_marker相等,*且*upload id在:id_marker之后(字母
520
+ # 表顺序排序,不包含marker)的upload请求
521
+ # @option opts [String] :id_marker upload id的标记,如
522
+ # 果:key_marker没有设置,则此参数会被忽略;否则与:key_marker一起
523
+ # 决定返回的结果(见上)
524
+ # @option opts [String] :prefix 如果指定,则只返回object key中符
525
+ # 合指定前缀的upload请求
526
+ # @return [Enumerator<Multipart::Transaction>] 其中每一个元素表
527
+ # 示一个upload请求
528
+ # @example
529
+ # key_marker = 1, id_marker = null
530
+ # # return <2, 0>, <2, 1>, <3, 0> ...
531
+ # key_marker = 1, id_marker = 5
532
+ # # return <1, 6>, <1, 7>, <2, 0>, <3, 0> ...
533
+ def list_uploads(opts = {})
534
+ Iterator::Uploads.new(
535
+ @protocol, name, opts.merge(encoding: KeyEncoding::URL)).to_enum
536
+ end
537
+
538
+ # 取消一个multipart上传请求,一般用于清除Bucket下因断点上传而产
539
+ # 生的文件碎片。成功取消后属于这个上传请求的分片都会被清除。
540
+ # @param [String] upload_id 上传请求的id,可通过{#list_uploads}
541
+ # 获得
542
+ # @param [String] key Object的名字
543
+ def abort_upload(upload_id, key)
544
+ @protocol.abort_multipart_upload(name, key, upload_id)
545
+ end
546
+
492
547
  # 获取Bucket的URL
493
548
  # @return [String] Bucket的URL
494
549
  def bucket_url
@@ -46,6 +46,8 @@ module Aliyun
46
46
  # 列出当前所有的bucket
47
47
  # @param opts [Hash] 查询选项
48
48
  # @option opts [String] :prefix 如果设置,则只返回以它为前缀的bucket
49
+ # @option opts [String] :marker 如果设置,则只返回名字在它之后
50
+ # (字典序,不包含marker)的bucket
49
51
  # @return [Enumerator<Bucket>] Bucket的迭代器
50
52
  def list_buckets(opts = {})
51
53
  if @config.cname
@@ -9,16 +9,22 @@ module Aliyun
9
9
  class Download < Transaction
10
10
  PART_SIZE = 10 * 1024 * 1024
11
11
  READ_SIZE = 16 * 1024
12
+ NUM_THREAD = 10
12
13
 
13
14
  def initialize(protocol, opts)
14
15
  args = opts.dup
15
16
  @protocol = protocol
16
17
  @progress = args.delete(:progress)
17
18
  @file = args.delete(:file)
18
- @checkpoint_file = args.delete(:cpt_file)
19
+ @cpt_file = args.delete(:cpt_file)
20
+ super(args)
21
+
19
22
  @object_meta = {}
23
+ @num_threads = options[:threads] || NUM_THREAD
24
+ @all_mutex = Mutex.new
20
25
  @parts = []
21
- super(args)
26
+ @todo_mutex = Mutex.new
27
+ @todo_parts = []
22
28
  end
23
29
 
24
30
  # Run the download transaction, which includes 3 stages:
@@ -27,8 +33,9 @@ module Aliyun
27
33
  # * 2. download each unfinished part
28
34
  # * 3. combine the downloaded parts into the final file
29
35
  def run
30
- logger.info("Begin download, file: #{@file}, checkpoint file: "\
31
- "#{@checkpoint_file}")
36
+ logger.info("Begin download, file: #{@file}, "\
37
+ "checkpoint file: #{@cpt_file}, "\
38
+ "threads: #{@num_threads}")
32
39
 
33
40
  # Rebuild transaction states from checkpoint file
34
41
  # Or initiate new transaction states
@@ -38,7 +45,17 @@ module Aliyun
38
45
  divide_parts if @parts.empty?
39
46
 
40
47
  # Download each part(object range)
41
- @parts.reject { |p| p[:done]}.each { |p| download_part(p) }
48
+ @todo_parts = @parts.reject { |p| p[:done] }
49
+
50
+ (1..@num_threads).map {
51
+ Thread.new {
52
+ loop {
53
+ p = sync_get_todo_part
54
+ break unless p
55
+ download_part(p)
56
+ }
57
+ }
58
+ }.map(&:join)
42
59
 
43
60
  # Combine the parts into the final file
44
61
  commit
@@ -62,25 +79,26 @@ module Aliyun
62
79
  # :md5 => 'states_md5'
63
80
  # }
64
81
  def checkpoint
65
- logger.debug("Begin make checkpoint, "\
66
- "disable_cpt: #{options[:disable_cpt]}")
82
+ logger.debug("Begin make checkpoint, disable_cpt: "\
83
+ "#{options[:disable_cpt] == true}")
67
84
 
68
85
  ensure_object_not_changed
69
86
 
87
+ parts = sync_get_all_parts
70
88
  states = {
71
89
  :id => id,
72
90
  :file => @file,
73
91
  :object_meta => @object_meta,
74
- :parts => @parts
92
+ :parts => parts
75
93
  }
76
94
 
77
95
  # report progress
78
96
  if @progress
79
- done = @parts.count { |p| p[:done] }
80
- @progress.call(done.to_f / @parts.size) if done > 0
97
+ done = parts.count { |p| p[:done] }
98
+ @progress.call(done.to_f / parts.size) if done > 0
81
99
  end
82
100
 
83
- write_checkpoint(states, @checkpoint_file) unless options[:disable_cpt]
101
+ write_checkpoint(states, @cpt_file) unless options[:disable_cpt]
84
102
 
85
103
  logger.debug("Done make checkpoint, states: #{states}")
86
104
  end
@@ -91,31 +109,33 @@ module Aliyun
91
109
  def commit
92
110
  logger.info("Begin commit transaction, id: #{id}")
93
111
 
112
+ parts = sync_get_all_parts
94
113
  # concat all part files into the target file
95
114
  File.open(@file, 'w') do |w|
96
- @parts.sort{ |x, y| x[:number] <=> y[:number] }.each do |p|
97
- File.open(get_part_file(p[:number])) do |r|
115
+ parts.sort{ |x, y| x[:number] <=> y[:number] }.each do |p|
116
+ File.open(get_part_file(p)) do |r|
98
117
  w.write(r.read(READ_SIZE)) until r.eof?
99
118
  end
100
119
  end
101
120
  end
102
121
 
103
- File.delete(@checkpoint_file) unless options[:disable_cpt]
104
- @parts.each{ |p| File.delete(get_part_file(p[:number])) }
122
+ File.delete(@cpt_file) unless options[:disable_cpt]
123
+ parts.each{ |p| File.delete(get_part_file(p)) }
105
124
 
106
125
  logger.info("Done commit transaction, id: #{id}")
107
126
  end
108
127
 
109
128
  # Rebuild the states of the transaction from checkpoint file
110
129
  def rebuild
111
- logger.info("Begin rebuild transaction, "\
112
- "checkpoint: #{@checkpoint_file}")
130
+ logger.info("Begin rebuild transaction, checkpoint: #{@cpt_file}")
113
131
 
114
- if File.exists?(@checkpoint_file) and not options[:disable_cpt]
115
- states = load_checkpoint(@checkpoint_file)
132
+ if options[:disable_cpt] || !File.exists?(@cpt_file)
133
+ initiate
134
+ else
135
+ states = load_checkpoint(@cpt_file)
116
136
 
117
137
  states[:parts].select{ |p| p[:done] }.each do |p|
118
- part_file = get_part_file(p[:number])
138
+ part_file = get_part_file(p)
119
139
 
120
140
  unless File.exist?(part_file)
121
141
  fail PartMissingError, "The part file is missing: #{part_file}."
@@ -130,8 +150,6 @@ module Aliyun
130
150
  @id = states[:id]
131
151
  @object_meta = states[:object_meta]
132
152
  @parts = states[:parts]
133
- else
134
- initiate
135
153
  end
136
154
 
137
155
  logger.info("Done rebuild transaction, states: #{states}")
@@ -155,14 +173,13 @@ module Aliyun
155
173
  def download_part(p)
156
174
  logger.debug("Begin download part: #{p}")
157
175
 
158
- part_file = get_part_file(p[:number])
176
+ part_file = get_part_file(p)
159
177
  File.open(part_file, 'w') do |w|
160
178
  @protocol.get_object(
161
179
  bucket, object, :range => p[:range]) { |chunk| w.write(chunk) }
162
180
  end
163
181
 
164
- p[:done] = true
165
- p[:md5] = get_file_md5(part_file)
182
+ sync_update_part(p.merge(done: true, md5: get_file_md5(part_file)))
166
183
 
167
184
  checkpoint
168
185
 
@@ -191,6 +208,24 @@ module Aliyun
191
208
  logger.info("Done divide parts, parts: #{@parts}")
192
209
  end
193
210
 
211
+ def sync_get_todo_part
212
+ @todo_mutex.synchronize {
213
+ @todo_parts.shift
214
+ }
215
+ end
216
+
217
+ def sync_update_part(p)
218
+ @all_mutex.synchronize {
219
+ @parts[p[:number] - 1] = p
220
+ }
221
+ end
222
+
223
+ def sync_get_all_parts
224
+ @all_mutex.synchronize {
225
+ @parts.dup
226
+ }
227
+ end
228
+
194
229
  # Ensure file not changed during uploading
195
230
  def ensure_object_not_changed
196
231
  obj = @protocol.get_object_meta(bucket, object)
@@ -206,8 +241,8 @@ module Aliyun
206
241
  end
207
242
 
208
243
  # Get name for part file
209
- def get_part_file(number)
210
- "#{@file}.part.#{number}"
244
+ def get_part_file(p)
245
+ "#{@file}.part.#{p[:number]}"
211
246
  end
212
247
  end # Download
213
248