aliyun-odps 0.1.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +31 -0
  4. data/Gemfile +3 -0
  5. data/README.md +55 -12
  6. data/Rakefile +15 -5
  7. data/aliyun-odps.gemspec +22 -11
  8. data/bin/console +10 -3
  9. data/lib/aliyun/odps.rb +69 -2
  10. data/lib/aliyun/odps/authorization.rb +90 -0
  11. data/lib/aliyun/odps/client.rb +40 -0
  12. data/lib/aliyun/odps/configuration.rb +16 -0
  13. data/lib/aliyun/odps/error.rb +97 -0
  14. data/lib/aliyun/odps/http.rb +138 -0
  15. data/lib/aliyun/odps/list.rb +40 -0
  16. data/lib/aliyun/odps/model/function.rb +16 -0
  17. data/lib/aliyun/odps/model/functions.rb +113 -0
  18. data/lib/aliyun/odps/model/instance.rb +130 -0
  19. data/lib/aliyun/odps/model/instance_task.rb +30 -0
  20. data/lib/aliyun/odps/model/instances.rb +119 -0
  21. data/lib/aliyun/odps/model/projects.rb +73 -0
  22. data/lib/aliyun/odps/model/resource.rb +26 -0
  23. data/lib/aliyun/odps/model/resources.rb +144 -0
  24. data/lib/aliyun/odps/model/table.rb +37 -0
  25. data/lib/aliyun/odps/model/table_column.rb +13 -0
  26. data/lib/aliyun/odps/model/table_partition.rb +9 -0
  27. data/lib/aliyun/odps/model/table_partitions.rb +90 -0
  28. data/lib/aliyun/odps/model/table_schema.rb +13 -0
  29. data/lib/aliyun/odps/model/tables.rb +125 -0
  30. data/lib/aliyun/odps/model/task_result.rb +9 -0
  31. data/lib/aliyun/odps/modelable.rb +16 -0
  32. data/lib/aliyun/odps/project.rb +47 -0
  33. data/lib/aliyun/odps/service_object.rb +27 -0
  34. data/lib/aliyun/odps/struct.rb +126 -0
  35. data/lib/aliyun/odps/tunnel/download_session.rb +98 -0
  36. data/lib/aliyun/odps/tunnel/router.rb +15 -0
  37. data/lib/aliyun/odps/tunnel/snappy_reader.rb +19 -0
  38. data/lib/aliyun/odps/tunnel/snappy_writer.rb +45 -0
  39. data/lib/aliyun/odps/tunnel/table_tunnels.rb +81 -0
  40. data/lib/aliyun/odps/tunnel/upload_block.rb +9 -0
  41. data/lib/aliyun/odps/tunnel/upload_session.rb +132 -0
  42. data/lib/aliyun/odps/utils.rb +102 -0
  43. data/lib/aliyun/odps/version.rb +1 -1
  44. data/requirements.png +0 -0
  45. data/wiki/error.md +188 -0
  46. data/wiki/functions.md +39 -0
  47. data/wiki/get_start.md +34 -0
  48. data/wiki/installation.md +15 -0
  49. data/wiki/instances.md +32 -0
  50. data/wiki/projects.md +51 -0
  51. data/wiki/resources.md +62 -0
  52. data/wiki/ssl.md +7 -0
  53. data/wiki/tables.md +75 -0
  54. data/wiki/tunnels.md +80 -0
  55. metadata +195 -13
  56. data/requirements.mindnode/QuickLook/Preview.jpg +0 -0
  57. data/requirements.mindnode/contents.xml +0 -10711
  58. data/requirements.mindnode/viewState.plist +0 -0
@@ -0,0 +1,98 @@
1
+ require 'odps_protobuf'
2
+ require 'aliyun/odps/tunnel/snappy_reader'
3
+
4
+ module Aliyun
5
+ module Odps
6
+ class DownloadSession < Struct::Base
7
+ property :project, Project, required: true
8
+ property :client, Client, required: true
9
+
10
+ property :download_id, String, required: true
11
+ property :table_name, String, required: true
12
+ property :partition_spec, String
13
+ property :record_count, Integer
14
+ property :status, String
15
+ property :owner, String
16
+ property :initiated, DateTime
17
+ property :schema, Hash
18
+
19
+ # Download data
20
+ #
21
+ # @see http://repo.aliyun.com/api-doc/Tunnel/get_table_download_id/index.html Get Download Block ID
22
+ #
23
+ # @param start [String] specify start download row number
24
+ # @param count [String] specify download row count
25
+ # @param columns [Array] specify columns need download in array
26
+ # @param encoding [String] specify the data compression format, supported value: raw, deflate
27
+ #
28
+ # @return [Raw Data] return the raw data from ODPS
29
+ def download(start, count, columns, encoding = 'raw')
30
+ path = "/projects/#{project.name}/tables/#{table_name}"
31
+
32
+ query = build_download_query(start, count, columns)
33
+ headers = build_download_headers(encoding)
34
+
35
+ resp = client.get(path, query: query, headers: headers)
36
+ protobufed2records(resp.parsed_response, resp.headers['content-encoding'], columns)
37
+ end
38
+
39
+ private
40
+
41
+ def build_download_query(start, count, columns)
42
+ query = {
43
+ data: true,
44
+ downloadid: download_id,
45
+ columns: columns.join(','),
46
+ rowrange: "(#{start},#{count})"
47
+ }
48
+ query[:partition] = partition_spec if partition_spec
49
+ query
50
+ end
51
+
52
+ def protobufed2records(data, encoding, columns)
53
+ data = uncompress_data(data, encoding)
54
+ deserializer = OdpsProtobuf::Deserializer.new
55
+ schema = build_schema_with(columns)
56
+ deserializer.deserialize(data, schema)
57
+ rescue
58
+ raise RecordNotMatchSchemaError.new(columns, schema)
59
+ end
60
+
61
+ def uncompress_data(data, encoding)
62
+ case encoding
63
+ when 'deflate'
64
+ data
65
+ when 'x-snappy-framed'
66
+ SnappyReader.uncompress(data)
67
+ else
68
+ data
69
+ end
70
+ end
71
+
72
+ def build_download_headers(encoding)
73
+ headers = { 'x-odps-tunnel-version' => TableTunnels::TUNNEL_VERSION }
74
+ set_accept_encoding(headers, encoding)
75
+ headers
76
+ end
77
+
78
+ def set_accept_encoding(headers, encoding)
79
+ case encoding.to_s.downcase
80
+ when 'deflate'
81
+ headers['Accept-Encoding'] = 'deflate'
82
+ when 'snappy'
83
+ headers['Accept-Encoding'] = 'x-snappy-framed'
84
+ when 'raw'
85
+ headers.delete('Accept-Encoding')
86
+ else
87
+ fail ValueNotSupportedError.new(:encoding, TableTunnels::SUPPORTED_ENCODING)
88
+ end
89
+ end
90
+
91
+ def build_schema_with(columns)
92
+ {
93
+ 'columns' => schema['columns'].select { |column| columns.include?(column['name']) }
94
+ }
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,15 @@
1
+ module Aliyun
2
+ module Odps
3
+ class TunnelRouter
4
+ def self.get_tunnel_endpoint(client, project_name)
5
+ host = client.get(
6
+ "/projects/#{project_name}/tunnel",
7
+ query: { service: true, curr_project: project_name }
8
+ ).parsed_response
9
+ "#{Aliyun::Odps.config.protocol}://#{host}"
10
+ rescue RequestError
11
+ nil
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ require 'zlib'
2
+ require 'odps_protobuf'
3
+
4
+ module Aliyun
5
+ class SnappyReader
6
+ def self.uncompress(data)
7
+ load_snappy
8
+
9
+ data.slice!(0, 18)
10
+ Snappy.inflate(data)
11
+ end
12
+
13
+ def self.load_snappy
14
+ require 'snappy'
15
+ rescue LoadError
16
+ raise 'Install snappy to support x-snappy-framed encoding: https://github.com/miyucy/snappy'
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,45 @@
1
+ require 'snappy'
2
+ require 'odps_protobuf'
3
+
4
+ module Aliyun
5
+ class SnappyWriter
6
+ CHUNK_MAX = 65_536
7
+ COMPRESSION_THRESHOLD = 0.125
8
+ COMPRESSED_CHUNK = 0x00
9
+ UNCOMPRESSED_CHUNK = 0x01
10
+ STREAM_IDENTIFIER = 'sNaPpY'
11
+ IDENTIFIER_CHUNK = 0xff
12
+
13
+ def self.compress(data)
14
+ load_snappy
15
+
16
+ out = [[IDENTIFIER_CHUNK + (STREAM_IDENTIFIER.length << 8)].pack('<L'), STREAM_IDENTIFIER.force_encoding('ASCII-8BIT')]
17
+ (0..data.length).step(CHUNK_MAX) do |i|
18
+ chunk = data[i, CHUNK_MAX]
19
+ crc = masked_crc32c(chunk)
20
+ compressed_chunk = Snappy.deflate(chunk)
21
+ if compressed_chunk.length <= (1 - COMPRESSION_THRESHOLD) * chunk.length
22
+ chunk = compressed_chunk
23
+ chunk_type = COMPRESSED_CHUNK
24
+ else
25
+ chunk_type = UNCOMPRESSED_CHUNK
26
+ end
27
+ chunk.force_encoding('ASCII-8BIT')
28
+ out << [chunk_type + ((chunk.length + 4) << 8), crc].pack('<LL')
29
+ out << chunk
30
+ end
31
+ out.join('')
32
+ end
33
+
34
+ def self.masked_crc32c(data)
35
+ crc = OdpsProtobuf::CrcCalculator.calculate(StringIO.new(data))
36
+ (((crc >> 15) | (crc << 17)) + 0xa282ead8) & 0xffffffff
37
+ end
38
+
39
+ def self.load_snappy
40
+ require 'snappy'
41
+ rescue LoadError
42
+ raise 'Install snappy to support x-snappy-framed encoding: https://github.com/miyucy/snappy'
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,81 @@
1
+ module Aliyun
2
+ module Odps
3
+ class TableTunnels < ServiceObject
4
+ TUNNEL_VERSION = '4'
5
+ SUPPORTED_ENCODING = %w(raw deflate snappy)
6
+
7
+ def client
8
+ config = Aliyun::Odps.config.dup
9
+ config.endpoint = TunnelRouter.get_tunnel_endpoint(project.client, project.name) || Aliyun::Odps.config.tunnel_endpoint
10
+ fail TunnelEndpointMissingError if config.endpoint.nil?
11
+ Aliyun::Odps::Client.new(config)
12
+ end
13
+
14
+ # Init Download Session
15
+ #
16
+ # @see http://repo.aliyun.com/api-doc/Tunnel/post_create_download_session/index.html Post Download Session
17
+ #
18
+ # @params table_name [String] specify table name
19
+ # @params partition [Hash] specify partition spec, format: { 'key1': 'value1', 'key2': 'value2' }
20
+ def init_download_session(table_name, partition = {})
21
+ path = "/projects/#{project.name}/tables/#{table_name}"
22
+ query = { downloads: true }
23
+ unless partition.empty?
24
+ query.merge!(partition: generate_partition_spec(partition))
25
+ end
26
+
27
+ resp = client.post(path, query: query)
28
+ result = resp.parsed_response
29
+ result = JSON.parse(result) if result.is_a?(String)
30
+
31
+ build_download_session(result, table_name, query['partition'])
32
+ end
33
+
34
+ # Init Upload Session
35
+ #
36
+ # @see http://repo.aliyun.com/api-doc/Tunnel/post_create_upload_session/index.html Post Upload Session
37
+ #
38
+ # @params table_name [String] specify table name
39
+ # @params partition [Hash] specify partition spec, format: { 'key1': 'value1', 'key2': 'value2' }
40
+ def init_upload_session(table_name, partition = {})
41
+ path = "/projects/#{project.name}/tables/#{table_name}"
42
+ query = { uploads: true }
43
+ unless partition.empty?
44
+ query.merge!(partition: generate_partition_spec(partition))
45
+ end
46
+
47
+ resp = client.post(path, query: query)
48
+ result = resp.parsed_response
49
+ result = JSON.parse(result) if result.is_a?(String)
50
+
51
+ build_upload_session(result, table_name, query['partition'])
52
+ end
53
+
54
+ private
55
+
56
+ def generate_partition_spec(partition)
57
+ partition.map { |k, v| "#{k}=#{v}" }.join(',')
58
+ end
59
+
60
+ def build_upload_session(result, table_name, partition_spec)
61
+ UploadSession.new(
62
+ result.merge(
63
+ project: project,
64
+ client: client,
65
+ table_name: table_name,
66
+ partition_spec: partition_spec
67
+ ))
68
+ end
69
+
70
+ def build_download_session(result, table_name, partition_spec)
71
+ DownloadSession.new(
72
+ result.merge(
73
+ project: project,
74
+ client: client,
75
+ table_name: table_name,
76
+ partition_spec: partition_spec
77
+ ))
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,9 @@
1
+ module Aliyun
2
+ module Odps
3
+ class UploadBlock < Struct::Base
4
+ property :block_id, :String, required: true
5
+ property :create_time, :String
6
+ property :date, :String
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,132 @@
1
+ require 'odps_protobuf'
2
+ require 'aliyun/odps/tunnel/snappy_writer'
3
+
4
+ module Aliyun
5
+ module Odps
6
+ class UploadSession < Struct::Base
7
+ property :project, :Project, required: true
8
+ property :client, :Client, required: true
9
+
10
+ property :upload_id, :String, required: true
11
+ property :table_name, :String, required: true
12
+ property :partition_spec, :String
13
+ property :status, :String
14
+ property :owner, :String
15
+ property :initiated, :DateTime
16
+ property :schema, :Hash
17
+ property :blocks, :Array, init_with: ->(value) do
18
+ value.map { |v| UploadBlock.new(v) }
19
+ end
20
+
21
+ alias_method :uploaded_block_list=, :blocks=
22
+
23
+ # Upload data with block id
24
+ #
25
+ # @see http://repo.aliyun.com/api-doc/Tunnel/put_create_upload_id/index.html Put Upload Block ID
26
+ #
27
+ # @param block_id [String] specify block_id for this upload, range in 0~19999, new block with replace with old with same blockid
28
+ # @param record_values [Array<Array>] specify the data, a array for your record, with order matched with your schema
29
+ # @param encoding [String] specify the data compression format, supported value: raw, deflate
30
+ #
31
+ # @return [true]
32
+ def upload(block_id, record_values, encoding = 'raw')
33
+ path = "/projects/#{project.name}/tables/#{table_name}"
34
+
35
+ query = { blockid: block_id, uploadid: upload_id }
36
+ query[:partition] = partition_spec if partition_spec
37
+
38
+ headers = build_upload_headers(encoding)
39
+ body = generate_upload_body(record_values, encoding)
40
+
41
+ !!client.put(path, query: query, headers: headers, body: body)
42
+ end
43
+
44
+ # reload this upload session
45
+ #
46
+ # @see http://repo.aliyun.com/api-doc/Tunnel/get_upload_session_status/index.html Get Upload Session Status
47
+ #
48
+ # @return [UploadSession]
49
+ def reload
50
+ path = "/projects/#{project.name}/tables/#{table_name}"
51
+
52
+ query = { uploadid: upload_id }
53
+ query.merge!(partition: partition_spec) if partition_spec
54
+
55
+ resp = client.get(path, query: query)
56
+
57
+ result = resp.parsed_response
58
+ attrs = result.is_a?(String) ? JSON.parse(result) : result
59
+
60
+ update_attrs(attrs)
61
+ end
62
+
63
+ # List uploaded blocks
64
+ #
65
+ # @return [Array<UploadBlock>]
66
+ def list_blocks
67
+ reload
68
+ blocks
69
+ end
70
+
71
+ # Complete the upload session
72
+ #
73
+ # @see http://repo.aliyun.com/api-doc/Tunnel/post_commit_upload_session/index.html Post commit Upload Session
74
+ #
75
+ # @return [true]
76
+ def complete
77
+ path = "/projects/#{project.name}/tables/#{table_name}"
78
+
79
+ query = { uploadid: upload_id }
80
+ query.merge!(partition: partition_spec) if partition_spec
81
+
82
+ headers = { 'x-odps-tunnel-version' => TableTunnels::TUNNEL_VERSION }
83
+
84
+ !!client.post(path, query: query, headers: headers)
85
+ end
86
+
87
+ private
88
+
89
+ def generate_upload_body(record_values, encoding)
90
+ serializer = OdpsProtobuf::Serializer.new
91
+ data = serializer.serialize(record_values, schema)
92
+ compress_data(data, encoding)
93
+ rescue
94
+ raise RecordNotMatchSchemaError.new(record_values, schema)
95
+ end
96
+
97
+ def compress_data(data, encoding)
98
+ case encoding
99
+ when 'raw'
100
+ data
101
+ when 'deflate'
102
+ require 'zlib'
103
+ Zlib::Deflate.deflate(data)
104
+ when 'snappy'
105
+ SnappyWriter.compress(data)
106
+ end
107
+ end
108
+
109
+ def set_content_encoding(headers, encoding)
110
+ case encoding.to_s.downcase
111
+ when 'deflate'
112
+ headers['Content-Encoding'] = 'deflate'
113
+ when 'snappy'
114
+ headers['Content-Encoding'] = 'x-snappy-framed'
115
+ when 'raw'
116
+ headers.delete('Content-Encoding')
117
+ else
118
+ fail ValueNotSupportedError.new(:encoding, TableTunnels::SUPPORTED_ENCODING)
119
+ end
120
+ end
121
+
122
+ def build_upload_headers(encoding)
123
+ headers = {
124
+ 'x-odps-tunnel-version' => TableTunnels::TUNNEL_VERSION,
125
+ 'Transfer-Encoding' => 'chunked'
126
+ }
127
+ set_content_encoding(headers, encoding)
128
+ headers
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,102 @@
1
+ require 'base64'
2
+ require 'openssl'
3
+ require 'digest'
4
+ require 'gyoku'
5
+
6
+ module Aliyun
7
+ module Odps
8
+ class Utils
9
+ class << self
10
+ # Calculate content length
11
+ #
12
+ # @return [Integer]
13
+ def content_size(content)
14
+ if content.respond_to?(:size)
15
+ content.size
16
+ elsif content.is_a?(IO)
17
+ content.stat.size
18
+ end
19
+ end
20
+
21
+ # HexDigest body with MD5
22
+ #
23
+ # @return [String]
24
+ def md5_hexdigest(body)
25
+ Digest::MD5.hexdigest(body).strip
26
+ end
27
+
28
+ # @example
29
+ # # { 'a' => 1, 'c' => 3 }
30
+ # Utils.hash_slice({ 'a' => 1, 'b' => 2, 'c' => 3 }, 'a', 'c')
31
+ #
32
+ # @return [Hash]
33
+ def hash_slice(hash, *selected_keys)
34
+ new_hash = {}
35
+ selected_keys.each { |k| new_hash[k] = hash[k] if hash.key?(k) }
36
+ new_hash
37
+ end
38
+
39
+ # Convert File or Bin data to bin data
40
+ #
41
+ # @return [Bin data]
42
+ def to_data(file_or_bin)
43
+ file_or_bin.respond_to?(:read) ? IO.binread(file_or_bin) : file_or_bin
44
+ end
45
+
46
+ def to_xml(hash, options = {}) # nodoc
47
+ %(<?xml version="1.0" encoding="UTF-8"?>#{Gyoku.xml(hash, options)})
48
+ end
49
+
50
+ # Dig values in deep hash
51
+ #
52
+ # @example
53
+ # dig_value({ 'a' => { 'b' => { 'c' => 3 } } }, 'a', 'b', 'c') # => 3
54
+ #
55
+ def dig_value(hash, *keys)
56
+ new_hash = hash.dup
57
+
58
+ keys.each do |key|
59
+ if new_hash.is_a?(Hash) && new_hash.key?(key)
60
+ new_hash = new_hash[key]
61
+ else
62
+ return nil
63
+ end
64
+ end
65
+ new_hash
66
+ end
67
+
68
+ # @see {http://apidock.com/rails/String/underscore String#underscore}
69
+ def underscore(str)
70
+ word = str.to_s.dup
71
+ word.gsub!(/::/, '/')
72
+ word.gsub!(/([A-Z\d]+)([A-Z][a-z])/, '\1_\2')
73
+ word.gsub!(/([a-z\d])([A-Z])/, '\1_\2')
74
+ word.tr!('-', '_')
75
+ word.downcase!
76
+ word
77
+ end
78
+
79
+ # Copy from {https://github.com/rails/rails/blob/14254d82a90b8aa4bd81f7eeebe33885bf83c378/activesupport/lib/active_support/core_ext/array/wrap.rb#L36 ActiveSupport::Array#wrap}
80
+ def wrap(object)
81
+ if object.nil?
82
+ []
83
+ elsif object.respond_to?(:to_ary)
84
+ object.to_ary || [object]
85
+ else
86
+ [object]
87
+ end
88
+ end
89
+
90
+ def stringify_keys!(hash)
91
+ hash.keys.each do |key|
92
+ hash[key.to_s] = hash.delete(key)
93
+ end
94
+ end
95
+
96
+ def generate_uuid(flag)
97
+ "#{flag}#{Time.now.strftime('%Y%m%d%H%M%S')}#{SecureRandom.hex(3)}"
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end