RubyGems - aliyun-sdk - Versions diffs - 0.1.3 → 0.1.4 - Mend

aliyun-sdk 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +21 -0
data/examples/aliyun/oss/bucket.rb +144 -0
data/examples/aliyun/oss/object.rb +182 -0
data/examples/aliyun/oss/resumable_download.rb +40 -0
data/examples/aliyun/oss/resumable_upload.rb +46 -0
data/examples/aliyun/oss/streaming.rb +124 -0
data/lib/aliyun/oss/bucket.rb +78 -23
data/lib/aliyun/oss/client.rb +2 -0
data/lib/aliyun/oss/download.rb +62 -27
data/lib/aliyun/oss/http.rb +6 -2
data/lib/aliyun/oss/iterator.rb +18 -0
data/lib/aliyun/oss/logging.rb +5 -2
data/lib/aliyun/oss/multipart.rb +3 -2
data/lib/aliyun/oss/object.rb +1 -1
data/lib/aliyun/oss/protocol.rb +17 -21
data/lib/aliyun/oss/upload.rb +58 -19
data/lib/aliyun/oss/util.rb +1 -0
data/lib/aliyun/oss/version.rb +1 -1
data/spec/aliyun/oss/client/bucket_spec.rb +88 -2
data/spec/aliyun/oss/client/resumable_download_spec.rb +8 -5
data/spec/aliyun/oss/client/resumable_upload_spec.rb +7 -4
data/spec/aliyun/oss/multipart_spec.rb +0 -10
data/spec/aliyun/oss/object_spec.rb +9 -10
data/tests/test_content_type.rb +100 -0
data/tests/test_large_file.rb +66 -0
data/tests/test_multipart.rb +105 -0
data/tests/test_object_key.rb +71 -0
data/tests/test_resumable.rb +41 -0
metadata +19 -2

data/examples/aliyun/oss/streaming.rb ADDED Viewed

@@ -0,0 +1,124 @@
+# -*- encoding: utf-8 -*-
+$LOAD_PATH.unshift(File.expand_path("../../../../lib", __FILE__))
+require 'yaml'
+require 'aliyun/oss'
+##
+# 一般来说用户在上传object和下载object时只需要指定文件名就可以满足需要：
+# - 在上传的时候client会从指定的文件中读取数据上传到OSS
+# - 在下载的时候client会把从OSS下载的数据写入到指定的文件中
+#
+# 在某些情况下用户可能会需要流式地上传和下载：
+# - 用户要写入到object中的数据不能立即得到全部，而是从网络中流式获取，
+#   然后再一段一段地写入到OSS中
+# - 用户要写入到object的数据是经过运算得出，每次得到一部分，用户不希望
+#   保留所有的数据然后一次性写入到OSS
+# - 用户下载的object很大，用户不希望一次性把它们下载到内存中，而是希望
+#   获取一部分就处理一部分；用户也不希望把它先下载到文件中，然后再从文
+#   件中读出来处理，这样会让数据经历不必要的拷贝
+#
+# 当然，对于流式上传的需求，我们可以使用OSS的appendable object来满足。
+# 但是即使是normal object，利用sdk的streaming功能，也可以实现流式上传
+# 和下载。
+# 初始化OSS client
+Aliyun::OSS::Logging.set_log_level(Logger::DEBUG)
+conf_file = '~/.oss.yml'
+conf = YAML.load(File.read(File.expand_path(conf_file)))
+bucket = Aliyun::OSS::Client.new(
+  :endpoint => conf['endpoint'],
+  :cname => conf['cname'],
+  :access_key_id => conf['id'],
+  :access_key_secret => conf['key']).get_bucket(conf['bucket'])
+# 辅助打印函数
+def demo(msg)
+  puts "######### #{msg} ########"
+  puts
+  yield
+  puts "-------------------------"
+  puts
+end
+# 例子1: 归并排序
+# 有两个文件sort.1, sort.2，它们分别存了一些从小到大排列的整数，每个整
+# 数1行，现在要将它们做归并排序的结果上传到OSS中，命名为sort.all
+local_1, local_2 = 'sort.1', 'sort.2'
+result_object = 'sort.all'
+File.open(File.expand_path(local_1), 'w') do |f|
+  [1001, 2005, 2007, 2011, 2013, 2015].each do |i|
+    f.puts(i.to_s)
+  end
+end
+File.open(File.expand_path(local_2), 'w') do |f|
+  [2009, 2010, 2012, 2017, 2020, 9999].each do |i|
+    f.puts(i.to_s)
+  end
+end
+demo "Streaming upload" do
+  bucket.put_object(result_object) do |content|
+    f1 = File.open(File.expand_path(local_1))
+    f2 = File.open(File.expand_path(local_2))
+    v1, v2 = f1.readline, f2.readline
+    until f1.eof? or f2.eof?
+      if v1.to_i < v2.to_i
+        content << v1
+        v1 = f1.readline
+      else
+        content << v2
+        v2 = f2.readline
+      end
+    end
+    [v1, v2].sort.each{|i| content << i}
+    content << f1.readline until f1.eof?
+    content << f2.readline until f2.eof?
+  end
+  puts "Put object: #{result_object}"
+  # 将文件下载下来查看
+  bucket.get_object(result_object, :file => result_object)
+  puts "Get object: #{result_object}"
+  puts "Content: #{File.read(result_object)}"
+end
+# 例子2: 下载进度条
+# 下载一个大文件（10M），在下载的过程中打印下载进度
+large_file = 'large_file'
+demo "Streaming download" do
+  puts "Begin put object: #{large_file}"
+  # 利用streaming上传
+  bucket.put_object(large_file) do |stream|
+    10.times { stream << "x" * (1024 * 1024) }
+  end
+  # 查看object大小
+  object_size = bucket.get_object(large_file).size
+  puts "Put object: #{large_file}, size: #{object_size}"
+  # 流式下载文件，仅打印进度，不保存文件
+  def to_percentile(v)
+    "#{(v * 100.0).round(2)} %"
+  end
+  puts "Begin download: #{large_file}"
+  last_got, got = 0, 0
+  bucket.get_object(large_file) do |chunk|
+    got += chunk.size
+    # 仅在下载进度大于10%的时候打印
+    if (got - last_got).to_f / object_size > 0.1
+      puts "Progress: #{to_percentile(got.to_f / object_size)}"
+      last_got = got
+    end
+  end
+  puts "Get object: #{large_file}, size: #{object_size}"
+end

data/lib/aliyun/oss/bucket.rb CHANGED Viewed

@@ -138,6 +138,8 @@ module Aliyun
       # @param opts [Hash] 查询选项
       # @option opts [String] :prefix 返回的object的前缀，如果设置则只
       #  返回那些名字以它为前缀的object
+      # @option opts [String] :marker 如果设置，则只返回名字在它之后
+      #  （字典序，不包含marker）的object
       # @option opts [String] :delimiter 用于获取公共前缀的分隔符，从
       #  前缀后面开始到第一个分隔符出现的位置之前的字符，作为公共前缀。
       # @example
@@ -151,8 +153,6 @@ module Aliyun
       #  '/foo/bar/', '/foo/xxx/'。它们恰好就是目录'/foo/'下的所有子目
       #  录。用delimiter获取公共前缀的方法避免了查询当前bucket下的所有
       #   object（可能数量巨大），是用于模拟目录结构的常用做法。
-      # @option opts [String] :encoding 指定返回的响应中object名字的编
-      #  码方法，目前只支持{OSS::KeyEncoding::URL}编码方式。
       # @return [Enumerator<Object>] 其中Object可能是{OSS::Object}，也
       #  可能是{String}，此时它是一个公共前缀
       # @example
@@ -165,7 +165,8 @@ module Aliyun
       #    end
       #  end
       def list_objects(opts = {})
-        Iterator::Objects.new(@protocol, name, opts).to_enum
+        Iterator::Objects.new(
+          @protocol, name, opts.merge(encoding: KeyEncoding::URL)).to_enum
       end
       # 向Bucket中上传一个object
@@ -192,17 +193,20 @@ module Aliyun
       #  它读到的数据为nil停止。
       # @note 如果opts中指定了:file，则block会被忽略
       def put_object(key, opts = {}, &block)
-        file = opts[:file]
-        if file
-          opts[:content_type] = get_content_type(file)
+        args = opts.dup
-          @protocol.put_object(name, key, opts) do |sw|
+        file = args[:file]
+        args[:content_type] ||= get_content_type(file) if file
+        args[:content_type] ||= get_content_type(key)
+        if file
+          @protocol.put_object(name, key, args) do |sw|
             File.open(File.expand_path(file), 'rb') do |f|
               sw << f.read(Protocol::STREAM_CHUNK_SIZE) until f.eof?
             end
           end
         else
-          @protocol.put_object(name, key, opts, &block)
+          @protocol.put_object(name, key, args, &block)
         end
       end
@@ -309,18 +313,20 @@ module Aliyun
       # @return [Integer] 返回下次append的位置
       # @yield [HTTP::StreamWriter] 同 {#put_object}
       def append_object(key, pos, opts = {}, &block)
-        next_pos = -1
-        file = opts[:file]
-        if file
-          opts[:content_type] = get_content_type(file)
+        args = opts.dup
-          next_pos = @protocol.append_object(name, key, pos, opts) do |sw|
+        file = args[:file]
+        args[:content_type] ||= get_content_type(file) if file
+        args[:content_type] ||= get_content_type(key)
+        if file
+          next_pos = @protocol.append_object(name, key, pos, args) do |sw|
             File.open(File.expand_path(file), 'rb') do |f|
               sw << f.read(Protocol::STREAM_CHUNK_SIZE) until f.eof?
             end
           end
         else
-          next_pos = @protocol.append_object(name, key, pos, opts, &block)
+          next_pos = @protocol.append_object(name, key, pos, args, &block)
         end
         next_pos
@@ -345,7 +351,10 @@ module Aliyun
       #  * :etag [String] 目标文件的ETag
       #  * :last_modified [Time] 目标文件的最后修改时间
       def copy_object(source, dest, opts = {})
-        @protocol.copy_object(name, source, dest, opts)
+        args = opts.dup
+        args[:content_type] ||= get_content_type(dest)
+        @protocol.copy_object(name, source, dest, args)
       end
       # 删除一个object
@@ -358,13 +367,12 @@ module Aliyun
       # @param keys [Array<String>] Object的名字集合
       # @param opts [Hash] 删除object的选项（可选）
       # @option opts [Boolean] :quiet 指定是否允许Server返回成功删除的
-      #  object
-      # @option opts [String] :encoding 指定Server返回的成功删除的
-      #  object的名字的编码方式，目前只支持{OSS::KeyEncoding::URL}
+      #  object，默认为false，即返回删除结果
       # @return [Array<String>] 成功删除的object的名字，如果指定
       #  了:quiet参数，则返回[]
       def batch_delete_objects(keys, opts = {})
-        @protocol.batch_delete_objects(name, keys, opts)
+        @protocol.batch_delete_objects(
+          name, keys, opts.merge(encoding: KeyEncoding::URL))
       end
       # 设置object的ACL
@@ -425,12 +433,17 @@ module Aliyun
       #     puts "Progress: #{(p * 100).round(2)} %"
       #   end
       def resumable_upload(key, file, opts = {}, &block)
-        unless cpt_file = opts[:cpt_file]
+        args = opts.dup
+        args[:content_type] ||= get_content_type(file)
+        args[:content_type] ||= get_content_type(key)
+        unless cpt_file = args[:cpt_file]
           cpt_file = get_cpt_file(file)
         end
         Multipart::Upload.new(
-          @protocol, options: opts,
+          @protocol, options: args,
           progress: block,
           object: key, bucket: name, creation_time: Time.now,
           file: File.expand_path(file), cpt_file: cpt_file
@@ -477,18 +490,60 @@ module Aliyun
       #     puts "Progress: #{(p * 100).round(2)} %"
       #   end
       def resumable_download(key, file, opts = {}, &block)
-        unless cpt_file = opts[:cpt_file]
+        args = opts.dup
+        args[:content_type] ||= get_content_type(file)
+        args[:content_type] ||= get_content_type(key)
+        unless cpt_file = args[:cpt_file]
           cpt_file = get_cpt_file(file)
         end
         Multipart::Download.new(
-          @protocol, options: opts,
+          @protocol, options: args,
           progress: block,
           object: key, bucket: name, creation_time: Time.now,
           file: File.expand_path(file), cpt_file: cpt_file
         ).run
       end
+      # 列出此Bucket中正在进行的multipart上传请求，不包括已经完成或者
+      # 被取消的。
+      # @param [Hash] opts 可选项
+      # @option opts [String] :key_marker object key的标记，根据有没有
+      #  设置:id_marker，:key_marker的含义不同：
+      #  1. 如果未设置:id_marker，则只返回object key在:key_marker之后
+      #     （字典序，不包含marker）的upload请求
+      #  2. 如果设置了:id_marker，则返回object key在:key_marker之后
+      #     （字典序，不包含marker）的uplaod请求*和*Object
+      #     key与:key_marker相等，*且*upload id在:id_marker之后（字母
+      #     表顺序排序，不包含marker）的upload请求
+      # @option opts [String] :id_marker upload id的标记，如
+      #  果:key_marker没有设置，则此参数会被忽略；否则与:key_marker一起
+      #  决定返回的结果（见上）
+      # @option opts [String] :prefix 如果指定，则只返回object key中符
+      #  合指定前缀的upload请求
+      # @return [Enumerator<Multipart::Transaction>] 其中每一个元素表
+      #  示一个upload请求
+      # @example
+      #   key_marker = 1, id_marker = null
+      #   # return <2, 0>, <2, 1>, <3, 0> ...
+      #   key_marker = 1, id_marker = 5
+      #   # return <1, 6>, <1, 7>, <2, 0>, <3, 0> ...
+      def list_uploads(opts = {})
+        Iterator::Uploads.new(
+          @protocol, name, opts.merge(encoding: KeyEncoding::URL)).to_enum
+      end
+      # 取消一个multipart上传请求，一般用于清除Bucket下因断点上传而产
+      # 生的文件碎片。成功取消后属于这个上传请求的分片都会被清除。
+      # @param [String] upload_id 上传请求的id，可通过{#list_uploads}
+      #  获得
+      # @param [String] key Object的名字
+      def abort_upload(upload_id, key)
+        @protocol.abort_multipart_upload(name, key, upload_id)
+      end
       # 获取Bucket的URL
       # @return [String] Bucket的URL
       def bucket_url

data/lib/aliyun/oss/client.rb CHANGED Viewed

@@ -46,6 +46,8 @@ module Aliyun
       # 列出当前所有的bucket
       # @param opts [Hash] 查询选项
       # @option opts [String] :prefix 如果设置，则只返回以它为前缀的bucket
+      # @option opts [String] :marker 如果设置，则只返回名字在它之后
+      #  （字典序，不包含marker）的bucket
       # @return [Enumerator<Bucket>] Bucket的迭代器
       def list_buckets(opts = {})
         if @config.cname

data/lib/aliyun/oss/download.rb CHANGED Viewed

@@ -9,16 +9,22 @@ module Aliyun
       class Download < Transaction
         PART_SIZE = 10 * 1024 * 1024
         READ_SIZE = 16 * 1024
+        NUM_THREAD = 10
         def initialize(protocol, opts)
           args = opts.dup
           @protocol = protocol
           @progress = args.delete(:progress)
           @file = args.delete(:file)
-          @checkpoint_file = args.delete(:cpt_file)
+          @cpt_file = args.delete(:cpt_file)
+          super(args)
           @object_meta = {}
+          @num_threads = options[:threads] || NUM_THREAD
+          @all_mutex = Mutex.new
           @parts = []
-          super(args)
+          @todo_mutex = Mutex.new
+          @todo_parts = []
         end
         # Run the download transaction, which includes 3 stages:
@@ -27,8 +33,9 @@ module Aliyun
         # * 2.  download each unfinished part
         # * 3.  combine the downloaded parts into the final file
         def run
-          logger.info("Begin download, file: #{@file}, checkpoint file: "\
-                      "#{@checkpoint_file}")
+          logger.info("Begin download, file: #{@file}, "\
+                      "checkpoint file: #{@cpt_file}, "\
+                      "threads: #{@num_threads}")
           # Rebuild transaction states from checkpoint file
           # Or initiate new transaction states
@@ -38,7 +45,17 @@ module Aliyun
           divide_parts if @parts.empty?
           # Download each part(object range)
-          @parts.reject { |p| p[:done]}.each { |p| download_part(p) }
+          @todo_parts = @parts.reject { |p| p[:done] }
+          (1..@num_threads).map {
+            Thread.new {
+              loop {
+                p = sync_get_todo_part
+                break unless p
+                download_part(p)
+              }
+            }
+          }.map(&:join)
           # Combine the parts into the final file
           commit
@@ -62,25 +79,26 @@ module Aliyun
         #     :md5 => 'states_md5'
         #   }
         def checkpoint
-          logger.debug("Begin make checkpoint, "\
-                       "disable_cpt: #{options[:disable_cpt]}")
+          logger.debug("Begin make checkpoint, disable_cpt: "\
+                       "#{options[:disable_cpt] == true}")
           ensure_object_not_changed
+          parts = sync_get_all_parts
           states = {
             :id => id,
             :file => @file,
             :object_meta => @object_meta,
-            :parts => @parts
+            :parts => parts
           }
           # report progress
           if @progress
-            done = @parts.count { |p| p[:done] }
-            @progress.call(done.to_f / @parts.size) if done > 0
+            done = parts.count { |p| p[:done] }
+            @progress.call(done.to_f / parts.size) if done > 0
           end
-          write_checkpoint(states, @checkpoint_file) unless options[:disable_cpt]
+          write_checkpoint(states, @cpt_file) unless options[:disable_cpt]
           logger.debug("Done make checkpoint, states: #{states}")
         end
@@ -91,31 +109,33 @@ module Aliyun
         def commit
           logger.info("Begin commit transaction, id: #{id}")
+          parts = sync_get_all_parts
           # concat all part files into the target file
           File.open(@file, 'w') do |w|
-            @parts.sort{ |x, y| x[:number] <=> y[:number] }.each do |p|
-              File.open(get_part_file(p[:number])) do |r|
+            parts.sort{ |x, y| x[:number] <=> y[:number] }.each do |p|
+              File.open(get_part_file(p)) do |r|
                   w.write(r.read(READ_SIZE)) until r.eof?
               end
             end
           end
-          File.delete(@checkpoint_file) unless options[:disable_cpt]
-          @parts.each{ |p| File.delete(get_part_file(p[:number])) }
+          File.delete(@cpt_file) unless options[:disable_cpt]
+          parts.each{ |p| File.delete(get_part_file(p)) }
           logger.info("Done commit transaction, id: #{id}")
         end
         # Rebuild the states of the transaction from checkpoint file
         def rebuild
-          logger.info("Begin rebuild transaction, "\
-                      "checkpoint: #{@checkpoint_file}")
+          logger.info("Begin rebuild transaction, checkpoint: #{@cpt_file}")
-          if File.exists?(@checkpoint_file) and not options[:disable_cpt]
-            states = load_checkpoint(@checkpoint_file)
+          if options[:disable_cpt] || !File.exists?(@cpt_file)
+            initiate
+          else
+            states = load_checkpoint(@cpt_file)
             states[:parts].select{ |p| p[:done] }.each do |p|
-              part_file = get_part_file(p[:number])
+              part_file = get_part_file(p)
               unless File.exist?(part_file)
                 fail PartMissingError, "The part file is missing: #{part_file}."
@@ -130,8 +150,6 @@ module Aliyun
             @id = states[:id]
             @object_meta = states[:object_meta]
             @parts = states[:parts]
-          else
-            initiate
           end
           logger.info("Done rebuild transaction, states: #{states}")
@@ -155,14 +173,13 @@ module Aliyun
         def download_part(p)
           logger.debug("Begin download part: #{p}")
-          part_file = get_part_file(p[:number])
+          part_file = get_part_file(p)
           File.open(part_file, 'w') do |w|
             @protocol.get_object(
               bucket, object, :range => p[:range]) { |chunk| w.write(chunk) }
           end
-          p[:done] = true
-          p[:md5] = get_file_md5(part_file)
+          sync_update_part(p.merge(done: true, md5: get_file_md5(part_file)))
           checkpoint
@@ -191,6 +208,24 @@ module Aliyun
           logger.info("Done divide parts, parts: #{@parts}")
         end
+        def sync_get_todo_part
+          @todo_mutex.synchronize {
+            @todo_parts.shift
+          }
+        end
+        def sync_update_part(p)
+          @all_mutex.synchronize {
+            @parts[p[:number] - 1] = p
+          }
+        end
+        def sync_get_all_parts
+          @all_mutex.synchronize {
+            @parts.dup
+          }
+        end
         # Ensure file not changed during uploading
         def ensure_object_not_changed
           obj = @protocol.get_object_meta(bucket, object)
@@ -206,8 +241,8 @@ module Aliyun
         end
         # Get name for part file
-        def get_part_file(number)
-          "#{@file}.part.#{number}"
+        def get_part_file(p)
+          "#{@file}.part.#{p[:number]}"
         end
       end # Download