RubyGems - iij-dag-client - Versions diffs - 1.0.1 - Mend

iij-dag-client 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +7 -0
data/.gitignore +18 -0
data/Gemfile +13 -0
data/LICENSE.txt +174 -0
data/Rakefile +43 -0
data/config/settings.yml +11 -0
data/iij-dag-client.gemspec +31 -0
data/lib/dag.rb +33 -0
data/lib/dag/client.rb +36 -0
data/lib/dag/client/api.rb +295 -0
data/lib/dag/client/api/cluster.rb +111 -0
data/lib/dag/client/api/database.rb +58 -0
data/lib/dag/client/api/job.rb +116 -0
data/lib/dag/client/api/list_params.rb +36 -0
data/lib/dag/client/api/rest_parameter.rb +149 -0
data/lib/dag/client/api/storage.rb +354 -0
data/lib/dag/client/api/storage_result.rb +52 -0
data/lib/dag/client/api/table.rb +131 -0
data/lib/dag/client/cluster.rb +26 -0
data/lib/dag/client/cluster_validation.rb +59 -0
data/lib/dag/client/database.rb +79 -0
data/lib/dag/client/exception.rb +43 -0
data/lib/dag/client/job.rb +56 -0
data/lib/dag/client/job_validation.rb +22 -0
data/lib/dag/client/model.rb +9 -0
data/lib/dag/client/model/bucket.rb +20 -0
data/lib/dag/client/model/bucket_collection.rb +34 -0
data/lib/dag/client/model/cluster.rb +100 -0
data/lib/dag/client/model/cluster_collection.rb +76 -0
data/lib/dag/client/model/database.rb +34 -0
data/lib/dag/client/model/database_collection.rb +51 -0
data/lib/dag/client/model/job.rb +125 -0
data/lib/dag/client/model/job_collection.rb +114 -0
data/lib/dag/client/model/object.rb +56 -0
data/lib/dag/client/model/object_collection.rb +64 -0
data/lib/dag/client/model/table.rb +55 -0
data/lib/dag/client/model/table_collection.rb +60 -0
data/lib/dag/client/storage.rb +41 -0
data/lib/dag/client/table.rb +16 -0
data/lib/dag/client/version.rb +5 -0
data/lib/dag/settings.rb +9 -0
metadata +210 -0

data/lib/dag/client/api/storage.rb ADDED Viewed

@@ -0,0 +1,354 @@
+require 'zlib'
+require 'mime-types'
+require 'singleton'
+module Dag
+  class Client::API
+    module Storage
+      def buckets
+        xml_doc = execute_storage(RestParameter.new(:get, '/'))
+        Dag::Client::API::BucketsResult.new(xml_doc)
+      end
+      def objects(bucket, prefix: nil, max: nil, marker: nil, delimiter: nil)
+        resource = '/'
+        query_params = {}
+        if prefix
+          query_params.merge!('prefix' => prefix)
+        end
+        if max
+          query_params.merge!('max-keys' => max)
+        end
+        if marker
+          query_params.merge!('marker' => marker)
+        end
+        if delimiter
+          query_params.merge!('delimiter' => delimiter)
+        end
+        xml_doc = execute_storage(RestParameter.new(:get, resource, bucket: bucket, query_params: query_params))
+        Dag::Client::API::ObjectsResult.new(xml_doc)
+      end
+      def create_bucket(bucket)
+        resource = '/'
+        execute_storage(RestParameter.new(:put, resource, bucket: bucket, content_type: 'application/json'))
+      end
+      def create_object(bucket, object_name, options = {}, &block)
+        resource = "/#{object_name}"
+        type = MIME::Types.type_for(object_name).first
+        content_type = type ? type.to_s : 'application/octet-stream'
+        options = options.merge(bucket: bucket, content_type: content_type)
+        execute_storage(RestParameter.new(:put, resource, options), &block)
+      end
+      def create_multipart_object(bucket, object_name, options = {}, &block)
+        mu = MultipartUpload.new(bucket, object_name, options) do
+          self
+        end
+        # Initiate Multipart Upload
+        upload_id = mu.initiate_multipart_upload
+        begin
+          # Upload Part
+          upload_objects = mu.upload_part(upload_id, &block)
+          # Complete Multipart Upload
+          mu.complete_multipart_upload(upload_id, upload_objects)
+        rescue => e
+          # Abort Multipart Upload
+          mu.abort_multipart_upload(upload_id)
+          raise e
+        end
+      end
+      def get_object(bucket, object, range = nil)
+        resource = "/#{object}"
+        headers = {}
+        if range
+          bt = "bytes=#{range.first}-"
+          bt += "#{range.last}" if range.last != -1
+          headers[:Range] = bt
+        end
+        execute_storage(RestParameter.new(:get, resource, bucket: bucket, raw_data: true, headers: headers))
+      end
+      def delete_bucket(bucket)
+        resource = '/'
+        execute_storage(RestParameter.new(:delete, resource, bucket: bucket))
+      end
+      def delete_object(bucket, object)
+        resource = "/#{object}"
+        execute_storage(RestParameter.new(:delete, resource, bucket: bucket, content_type: 'application/json'))
+      end
+      def import(db_name, tbl_name, file_paths, options = {})
+        _import = Import.new(db_name, tbl_name, file_paths, options) do
+          self
+        end
+        # calc label suffix => Fixnum
+        suffix = _import.calc_label_suffix
+        # import execute
+        upload_objects = _import.execute(suffix)
+        STDERR.puts "finished upload #{upload_objects.size} objects."
+        STDERR.puts
+        STDERR.puts 'upload_objects:'
+        upload_objects.each do |o|
+          STDERR.puts o
+        end
+      end
+      private
+      class Import
+        def initialize(db_name, tbl_name, file_paths, options = {}, &block)
+          @db_name = db_name
+          @tbl_naem = tbl_name
+          @file_paths = file_paths
+          @jobs = options.delete(:jobs) || 1
+          @label = options.delete(:label) || 'label'
+          @splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100M
+          @api = block[]
+          import_parameter = ImportParameter.instance
+          import_parameter.db_name = db_name
+          import_parameter.tbl_name = tbl_name
+          import_parameter.label = @label
+          if %w(_ .).include? @label[0]
+            raise Dag::Client::ParameterInvalid.new("label should not start with '_' or '.'")
+          end
+          STDERR.puts "Initialize...\njobs: #{@jobs}, splitsz: #{@splitsz}"
+        end
+        def calc_label_suffix
+          prefix = ImportParameter.instance.storage_prefix
+          objects = @api.objects(@db_name, prefix: prefix).objects
+          return 0 if objects.blank?
+          objects.map { |o| o.scan(/#{@label}_(\d+)/) }.flatten.map(&:to_i).sort.reverse.first + 1
+        end
+        def execute(suffix)
+          file_paths = @file_paths.is_a?(String) ? [@file_paths] : @file_paths
+          upload_objects = []
+          file_paths.each do |file_path|
+            file_index = if file_path.end_with?('.gz')
+                            import_gz_file(file_path, suffix, upload_objects)
+                          elsif file_path == "-"
+                            import_stream($stdin, suffix, upload_objects)
+                          else
+                            import_text_file(file_path, suffix, upload_objects)
+                          end
+            suffix += file_index
+          end
+          return upload_objects
+        end
+        def import_gz_file(file_path, suffix, upload_objects)
+          import_stream(Zlib::GzipReader.open(file_path), suffix, upload_objects)
+        rescue Zlib::Error
+          #if not gzip
+          import_text_file(file_path, suffix, upload_objects)
+        end
+        def import_text_file(file_path, suffix, upload_objects)
+          import_stream(File.open(file_path), suffix, upload_objects)
+        end
+        def import_stream(ifp, suffix, upload_objects)
+          q = SizedQueue.new(@jobs)
+          th = Array.new(@jobs) {
+            Thread.new{
+              while data = q.pop
+                break unless data
+                STDERR.puts "> starting upload part #{data[2]}, #{data[1].length}"
+                execute_storage_detail(data[1], suffix + data[0])
+                STDERR.puts "< finished upload part #{data[2]}, #{data[1].length}"
+                upload_objects << ImportParameter.instance.object_label(suffix + data[0])
+              end
+              q.push nil
+            }
+          }
+          begin
+            file_index = 0
+            import_index = ImportParameter.instance.index
+            while true
+              buffer = ifp.read(@splitsz)
+              break unless buffer
+              buffer.force_encoding("ASCII-8BIT")
+              nline = ifp.gets
+              if nline
+                nline.force_encoding("ASCII-8BIT")
+                buffer.concat(nline)
+              end
+              q.push [file_index, buffer, import_index]
+              file_index += 1
+              import_index += 1
+            end
+            q.push nil
+          end
+          th.map(&:join)
+          ifp.close
+          file_index
+        end
+        def execute_storage_detail(data, suffix)
+          str = StringIO.new
+          gz = Zlib::GzipWriter.new(str)
+          gz.write data
+          gz.close
+          options = {
+              content_type: 'application/x-gzip',
+              bucket: @db_name,
+              import: true
+          }
+          resource = ImportParameter.instance.url(suffix)
+          @api.execute_storage(RestParameter.new(:put, resource, options)) do
+            str.string
+          end
+        end
+        class ImportParameter
+          include Singleton
+          attr_accessor :db_name, :tbl_name, :label, :index
+          def initialize
+            @index = 1
+          end
+          def url(suffix)
+            "/#{@tbl_name}/#{@label}_#{suffix}.gz"
+          end
+          def object_label(suffix)
+            "/#{@db_name}/#{@tbl_name}/#{@label}_#{suffix}.gz"
+          end
+          def file_label(suffix)
+            "#{@label}_#{suffix}"
+          end
+          def storage_prefix
+            "#{@tbl_name}/#{@label}"
+          end
+        end
+      end
+      class MultipartUpload
+        def initialize(bucket, object, options = {}, &block)
+          type = MIME::Types.type_for(object).first
+          content_type = type ? type.to_s : 'application/octet-stream'
+          options = options.merge(bucket: bucket, content_type: content_type)
+          @bucket = bucket
+          @object = object
+          @splitsz = options.delete(:splitsz) || 100 * 1024 ** 2 #100MB
+          @jobs = options.delete(:jobs) || 1
+          @options = options
+          @api = block[]
+        end
+        def initiate_multipart_upload
+          STDERR.puts "Initiate multipart upload...\njobs:#{@jobs}, splitsz:#{@splitsz}"
+          resource = "/#{@object}?uploads"
+          response = @api.execute_storage(RestParameter.new(:post, resource, @options))
+          upload_id = response.elements['InitiateMultipartUploadResult/UploadId'].text
+          return upload_id
+        end
+        def upload_part(upload_id, &block)
+          upload_objects = {}
+          split_stream(upload_id, upload_objects, &block)
+          return Hash[upload_objects.sort]
+        end
+        def complete_multipart_upload(upload_id, upload_objects)
+          resource = "/#{@object}?uploadId=#{upload_id}"
+          payload = '<CompleteMultipartUpload>'
+          upload_objects.each do |part, etag|
+            payload += "<Part><PartNumber>#{part}</PartNumber><ETag>#{etag}</ETag></Part>"
+          end
+          payload += '</CompleteMultipartUpload>'
+          @api.execute_storage(RestParameter.new(:post, resource, @options)) do
+            payload
+          end
+          puts "complete multipart upload."
+        end
+        def abort_multipart_upload(upload_id)
+          resource = "/#{@object}?uploadId=#{upload_id}"
+          @api.execute_storage(RestParameter.new(:delete, resource, @options))
+        end
+        private
+        def split_stream(upload_id, upload_objects, &block)
+          limit = 5 * 1024 ** 2 #5MB
+          raise "split size is invalid. below lower limit of #{limit} byte" if @splitsz < limit
+          ifp = block[]
+          q = SizedQueue.new(@jobs)
+          th = Array.new(@jobs) {
+            Thread.new{
+              while data = q.pop
+                break unless data
+                puts "> starting upload part #{data[0]}, #{data[1].length}"
+                resource = "/#{@object}?partNumber=#{data[0]}&uploadId=#{upload_id}"
+                response = @api.execute_storage(RestParameter.new(:put, resource, @options)) do
+                  data[1]
+                end
+                puts "< finished upload part #{data[0]}, #{data[1].length}"
+                upload_objects[data[0]] = response.headers['ETag'].first
+              end
+              q.push nil
+            }
+          }
+          begin
+            file_index = 1
+            while true
+              buffer = ifp.read(@splitsz)
+              break unless buffer
+              buffer.force_encoding("ASCII-8BIT")
+              q.push [file_index, buffer]
+              file_index += 1
+            end
+            q.push nil
+          end
+          th.map(&:join)
+          puts "finished upload #{file_index-1} part objects."
+        end
+      end
+    end
+  end
+end

data/lib/dag/client/api/storage_result.rb ADDED Viewed

@@ -0,0 +1,52 @@
+module Dag
+  class Client::API
+    class StorageResult
+      def initialize(xml_doc)
+        @xml_doc = xml_doc
+      end
+    end
+    class BucketsResult < StorageResult
+      def buckets
+        REXML::XPath.match(@xml_doc, "/ListAllMyBucketsResult/Buckets/Bucket/Name").map { |b| b.text }
+      end
+      def owner_id
+        REXML::XPath.match(@xml_doc, "/ListAllMyBucketsResult/Owner/ID").map { |b| b.text }.first
+      end
+      def owner_display_name
+        REXML::XPath.match(@xml_doc, "/ListAllMyBucketsResult/Owner/DisplayName").map { |b| b.text }.first
+      end
+    end
+    class ObjectsResult < StorageResult
+      def objects
+        REXML::XPath.match(@xml_doc, "/ListBucketResult/Contents/Key").map { |b| b.text }
+      end
+      def full_objects
+        REXML::XPath.match(@xml_doc, "/ListBucketResult/Contents").map{|m|
+          XmlSimple.xml_in(m.to_s)
+        }
+      end
+      def truncated?
+        REXML::XPath.match(@xml_doc, "/ListBucketResult/IsTruncated").map { |b| b.text }.first == 'true'
+      end
+      def marker
+        REXML::XPath.match(@xml_doc, "/ListBucketResult/Marker").map { |b| b.text }.first
+      end
+      def next_marker
+        REXML::XPath.match(@xml_doc, "/ListBucketResult/NextMarker").map { |b| b.text }.first
+      end
+      def max
+        REXML::XPath.match(@xml_doc, "/ListBucketResult/MaxKeys").map { |b| b.text }.first.to_i
+      end
+    end
+  end
+end

data/lib/dag/client/api/table.rb ADDED Viewed

@@ -0,0 +1,131 @@
+require 'active_support/core_ext/object/to_query'
+module Dag
+  class Client::API
+    module Table
+      def table_info_list(cluster_name, database_name, options = {})
+        resource = %Q(/v1/#{cluster_name}/#{database_name})
+        execute(RestParameter.new(:get, resource, cano_resource: 'table', query_params: list_params(options)))
+      end
+      def table(cluster_name, database_name, tbl_name, params: {})
+        resource = %Q(/v1/#{cluster_name}/#{database_name}/#{tbl_name})
+        begin
+          execute(RestParameter.new(:get, resource, cano_resource: 'table'))
+        rescue Dag::Client::APIFailure => e
+          raise e if e.api_code != "TableNotFound"
+          nil
+        end
+      end
+      def create_table(cluster_name, db_name, params: {})
+        tbl_name = params[:table]
+        raise Dag::Client::ParameterInvalid.new('table name is blank') if tbl_name.blank?
+        if tbl_name !~ /\A[a-z0-9_]+\Z/
+          raise Dag::Client::ParameterInvalid.new("tbl_name is invalid: #{tbl_name}")
+        end
+        if tbl_name.length > 128
+          raise Dag::Client::ParameterInvalid.new("tbl_name is too long: #{tbl_name}")
+        end
+        format = params[:format]
+        if format && !['csv', 'tsv', 'json', 'json_agent'].include?(format)
+          raise Dag::Client::ParameterInvalid.new("format is invalid: #{format}")
+        end
+        comment = params[:comment]
+        if comment.present? && comment !~ /\A[[:ascii:]]+\Z/
+          raise Dag::Client::ParameterInvalid.new("comment is not ascii")
+        end
+        if comment && comment.length > 100
+          raise Dag::Client::ParameterInvalid.new("comment is too long")
+        end
+        resource = %Q(/v1/#{cluster_name}/#{db_name}/#{tbl_name})
+        parameters = {}
+        if format
+          parameters.merge!('format' => format)
+        end
+        schema = params[:schema]
+        if schema
+          parameters.merge!('schema' => params[:schema])
+        end
+        if comment
+          parameters.merge!('comment' => comment)
+        end
+        # Table Check
+        if params[:create_api] && response = table(cluster_name, db_name, tbl_name)
+          if response['tableName'] == tbl_name
+            raise Dag::Client::TableAlreadyExists.new('Table already exists')
+          end
+        end
+        execute(RestParameter.new(:put, resource, cano_resource: 'table', content_type: 'application/json', parameters: parameters))
+      end
+      def split_table(cluster_name, database_name, tbl_name, params)
+        raise Dag::Client::ParameterInvalid.new('params is blank') if params.blank?
+        input_object_keys = params[:input_object_keys]
+        unless input_object_keys.instance_of?(Array)
+          raise Dag::Client::ParameterInvalid.new('input_object_keys is not array')
+        end
+        raise Dag::Client::ParameterInvalid.new('input_object_keys is blank') if input_object_keys.blank?
+        input_object_keys.each do |input_object_key|
+          unless input_object_key.start_with?('dag://')
+            raise Dag::Client::ParameterInvalid.new("input_object_key should start with 'dag://'")
+          end
+        end
+        input_format = params[:input_format]
+        raise Dag::Client::ParameterInvalid.new('input_format is blank') if input_format.blank?
+        unless ['csv', 'tsv', 'json'].include?(input_format)
+          raise Dag::Client::ParameterInvalid.new("input_format is invalid:#{input_format}")
+        end
+        parameters = {
+          'inputObjectKeys' => params[:input_object_keys],
+          'inputFormat' => input_format,
+          'outputDatabase' => database_name,
+          'outputTable' => tbl_name,
+          'clusterName' => cluster_name
+        }
+        label = params[:label]
+        if label.present?
+          parameters.merge!('label' => label)
+        end
+        schema = params[:schema]
+        if schema.present?
+          parameters.merge!('schema' => schema)
+        end
+        execute(RestParameter.new(:post, "/v1/", cano_resource: 'split', content_type: 'application/json', parameters: parameters))
+      end
+      def delete_table(cluster_name, database_name, tbl_name)
+        execute(RestParameter.new(:delete, "/v1/#{cluster_name}/#{database_name}/#{tbl_name}", content_type: 'application/json', cano_resource: 'table'))
+      end
+      private
+      def default_schema(format)
+        case format
+        when 'csv', 'tsv'
+          'v array<string>'
+        when 'json'
+          'v map<string, string>'
+        when 'json_agent'
+          'time int, v map<string, string>'
+        end
+      end
+    end
+  end
+end