aliyun-odps 0.1.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +31 -0
  4. data/Gemfile +3 -0
  5. data/README.md +55 -12
  6. data/Rakefile +15 -5
  7. data/aliyun-odps.gemspec +22 -11
  8. data/bin/console +10 -3
  9. data/lib/aliyun/odps.rb +69 -2
  10. data/lib/aliyun/odps/authorization.rb +90 -0
  11. data/lib/aliyun/odps/client.rb +40 -0
  12. data/lib/aliyun/odps/configuration.rb +16 -0
  13. data/lib/aliyun/odps/error.rb +97 -0
  14. data/lib/aliyun/odps/http.rb +138 -0
  15. data/lib/aliyun/odps/list.rb +40 -0
  16. data/lib/aliyun/odps/model/function.rb +16 -0
  17. data/lib/aliyun/odps/model/functions.rb +113 -0
  18. data/lib/aliyun/odps/model/instance.rb +130 -0
  19. data/lib/aliyun/odps/model/instance_task.rb +30 -0
  20. data/lib/aliyun/odps/model/instances.rb +119 -0
  21. data/lib/aliyun/odps/model/projects.rb +73 -0
  22. data/lib/aliyun/odps/model/resource.rb +26 -0
  23. data/lib/aliyun/odps/model/resources.rb +144 -0
  24. data/lib/aliyun/odps/model/table.rb +37 -0
  25. data/lib/aliyun/odps/model/table_column.rb +13 -0
  26. data/lib/aliyun/odps/model/table_partition.rb +9 -0
  27. data/lib/aliyun/odps/model/table_partitions.rb +90 -0
  28. data/lib/aliyun/odps/model/table_schema.rb +13 -0
  29. data/lib/aliyun/odps/model/tables.rb +125 -0
  30. data/lib/aliyun/odps/model/task_result.rb +9 -0
  31. data/lib/aliyun/odps/modelable.rb +16 -0
  32. data/lib/aliyun/odps/project.rb +47 -0
  33. data/lib/aliyun/odps/service_object.rb +27 -0
  34. data/lib/aliyun/odps/struct.rb +126 -0
  35. data/lib/aliyun/odps/tunnel/download_session.rb +98 -0
  36. data/lib/aliyun/odps/tunnel/router.rb +15 -0
  37. data/lib/aliyun/odps/tunnel/snappy_reader.rb +19 -0
  38. data/lib/aliyun/odps/tunnel/snappy_writer.rb +45 -0
  39. data/lib/aliyun/odps/tunnel/table_tunnels.rb +81 -0
  40. data/lib/aliyun/odps/tunnel/upload_block.rb +9 -0
  41. data/lib/aliyun/odps/tunnel/upload_session.rb +132 -0
  42. data/lib/aliyun/odps/utils.rb +102 -0
  43. data/lib/aliyun/odps/version.rb +1 -1
  44. data/requirements.png +0 -0
  45. data/wiki/error.md +188 -0
  46. data/wiki/functions.md +39 -0
  47. data/wiki/get_start.md +34 -0
  48. data/wiki/installation.md +15 -0
  49. data/wiki/instances.md +32 -0
  50. data/wiki/projects.md +51 -0
  51. data/wiki/resources.md +62 -0
  52. data/wiki/ssl.md +7 -0
  53. data/wiki/tables.md +75 -0
  54. data/wiki/tunnels.md +80 -0
  55. metadata +195 -13
  56. data/requirements.mindnode/QuickLook/Preview.jpg +0 -0
  57. data/requirements.mindnode/contents.xml +0 -10711
  58. data/requirements.mindnode/viewState.plist +0 -0
@@ -0,0 +1,98 @@
1
+ require 'odps_protobuf'
2
+ require 'aliyun/odps/tunnel/snappy_reader'
3
+
4
+ module Aliyun
5
+ module Odps
6
+ class DownloadSession < Struct::Base
7
+ property :project, Project, required: true
8
+ property :client, Client, required: true
9
+
10
+ property :download_id, String, required: true
11
+ property :table_name, String, required: true
12
+ property :partition_spec, String
13
+ property :record_count, Integer
14
+ property :status, String
15
+ property :owner, String
16
+ property :initiated, DateTime
17
+ property :schema, Hash
18
+
19
+ # Download data
20
+ #
21
+ # @see http://repo.aliyun.com/api-doc/Tunnel/get_table_download_id/index.html Get Download Block ID
22
+ #
23
+ # @param start [String] specify start download row number
24
+ # @param count [String] specify download row count
25
+ # @param columns [Array] specify columns need download in array
26
+ # @param encoding [String] specify the data compression format, supported value: raw, deflate
27
+ #
28
+ # @return [Raw Data] return the raw data from ODPS
29
+ def download(start, count, columns, encoding = 'raw')
30
+ path = "/projects/#{project.name}/tables/#{table_name}"
31
+
32
+ query = build_download_query(start, count, columns)
33
+ headers = build_download_headers(encoding)
34
+
35
+ resp = client.get(path, query: query, headers: headers)
36
+ protobufed2records(resp.parsed_response, resp.headers['content-encoding'], columns)
37
+ end
38
+
39
+ private
40
+
41
+ def build_download_query(start, count, columns)
42
+ query = {
43
+ data: true,
44
+ downloadid: download_id,
45
+ columns: columns.join(','),
46
+ rowrange: "(#{start},#{count})"
47
+ }
48
+ query[:partition] = partition_spec if partition_spec
49
+ query
50
+ end
51
+
52
+ def protobufed2records(data, encoding, columns)
53
+ data = uncompress_data(data, encoding)
54
+ deserializer = OdpsProtobuf::Deserializer.new
55
+ schema = build_schema_with(columns)
56
+ deserializer.deserialize(data, schema)
57
+ rescue
58
+ raise RecordNotMatchSchemaError.new(columns, schema)
59
+ end
60
+
61
+ def uncompress_data(data, encoding)
62
+ case encoding
63
+ when 'deflate'
64
+ data
65
+ when 'x-snappy-framed'
66
+ SnappyReader.uncompress(data)
67
+ else
68
+ data
69
+ end
70
+ end
71
+
72
+ def build_download_headers(encoding)
73
+ headers = { 'x-odps-tunnel-version' => TableTunnels::TUNNEL_VERSION }
74
+ set_accept_encoding(headers, encoding)
75
+ headers
76
+ end
77
+
78
+ def set_accept_encoding(headers, encoding)
79
+ case encoding.to_s.downcase
80
+ when 'deflate'
81
+ headers['Accept-Encoding'] = 'deflate'
82
+ when 'snappy'
83
+ headers['Accept-Encoding'] = 'x-snappy-framed'
84
+ when 'raw'
85
+ headers.delete('Accept-Encoding')
86
+ else
87
+ fail ValueNotSupportedError.new(:encoding, TableTunnels::SUPPORTED_ENCODING)
88
+ end
89
+ end
90
+
91
+ def build_schema_with(columns)
92
+ {
93
+ 'columns' => schema['columns'].select { |column| columns.include?(column['name']) }
94
+ }
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,15 @@
1
+ module Aliyun
2
+ module Odps
3
+ class TunnelRouter
4
+ def self.get_tunnel_endpoint(client, project_name)
5
+ host = client.get(
6
+ "/projects/#{project_name}/tunnel",
7
+ query: { service: true, curr_project: project_name }
8
+ ).parsed_response
9
+ "#{Aliyun::Odps.config.protocol}://#{host}"
10
+ rescue RequestError
11
+ nil
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ require 'zlib'
2
+ require 'odps_protobuf'
3
+
4
+ module Aliyun
5
+ class SnappyReader
6
+ def self.uncompress(data)
7
+ load_snappy
8
+
9
+ data.slice!(0, 18)
10
+ Snappy.inflate(data)
11
+ end
12
+
13
+ def self.load_snappy
14
+ require 'snappy'
15
+ rescue LoadError
16
+ raise 'Install snappy to support x-snappy-framed encoding: https://github.com/miyucy/snappy'
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,45 @@
1
+ require 'snappy'
2
+ require 'odps_protobuf'
3
+
4
+ module Aliyun
5
+ class SnappyWriter
6
+ CHUNK_MAX = 65_536
7
+ COMPRESSION_THRESHOLD = 0.125
8
+ COMPRESSED_CHUNK = 0x00
9
+ UNCOMPRESSED_CHUNK = 0x01
10
+ STREAM_IDENTIFIER = 'sNaPpY'
11
+ IDENTIFIER_CHUNK = 0xff
12
+
13
+ def self.compress(data)
14
+ load_snappy
15
+
16
+ out = [[IDENTIFIER_CHUNK + (STREAM_IDENTIFIER.length << 8)].pack('<L'), STREAM_IDENTIFIER.force_encoding('ASCII-8BIT')]
17
+ (0..data.length).step(CHUNK_MAX) do |i|
18
+ chunk = data[i, CHUNK_MAX]
19
+ crc = masked_crc32c(chunk)
20
+ compressed_chunk = Snappy.deflate(chunk)
21
+ if compressed_chunk.length <= (1 - COMPRESSION_THRESHOLD) * chunk.length
22
+ chunk = compressed_chunk
23
+ chunk_type = COMPRESSED_CHUNK
24
+ else
25
+ chunk_type = UNCOMPRESSED_CHUNK
26
+ end
27
+ chunk.force_encoding('ASCII-8BIT')
28
+ out << [chunk_type + ((chunk.length + 4) << 8), crc].pack('<LL')
29
+ out << chunk
30
+ end
31
+ out.join('')
32
+ end
33
+
34
+ def self.masked_crc32c(data)
35
+ crc = OdpsProtobuf::CrcCalculator.calculate(StringIO.new(data))
36
+ (((crc >> 15) | (crc << 17)) + 0xa282ead8) & 0xffffffff
37
+ end
38
+
39
+ def self.load_snappy
40
+ require 'snappy'
41
+ rescue LoadError
42
+ raise 'Install snappy to support x-snappy-framed encoding: https://github.com/miyucy/snappy'
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,81 @@
1
+ module Aliyun
2
+ module Odps
3
+ class TableTunnels < ServiceObject
4
+ TUNNEL_VERSION = '4'
5
+ SUPPORTED_ENCODING = %w(raw deflate snappy)
6
+
7
+ def client
8
+ config = Aliyun::Odps.config.dup
9
+ config.endpoint = TunnelRouter.get_tunnel_endpoint(project.client, project.name) || Aliyun::Odps.config.tunnel_endpoint
10
+ fail TunnelEndpointMissingError if config.endpoint.nil?
11
+ Aliyun::Odps::Client.new(config)
12
+ end
13
+
14
+ # Init Download Session
15
+ #
16
+ # @see http://repo.aliyun.com/api-doc/Tunnel/post_create_download_session/index.html Post Download Session
17
+ #
18
+ # @params table_name [String] specify table name
19
+ # @params partition [Hash] specify partition spec, format: { 'key1': 'value1', 'key2': 'value2' }
20
+ def init_download_session(table_name, partition = {})
21
+ path = "/projects/#{project.name}/tables/#{table_name}"
22
+ query = { downloads: true }
23
+ unless partition.empty?
24
+ query.merge!(partition: generate_partition_spec(partition))
25
+ end
26
+
27
+ resp = client.post(path, query: query)
28
+ result = resp.parsed_response
29
+ result = JSON.parse(result) if result.is_a?(String)
30
+
31
+ build_download_session(result, table_name, query['partition'])
32
+ end
33
+
34
+ # Init Upload Session
35
+ #
36
+ # @see http://repo.aliyun.com/api-doc/Tunnel/post_create_upload_session/index.html Post Upload Session
37
+ #
38
+ # @params table_name [String] specify table name
39
+ # @params partition [Hash] specify partition spec, format: { 'key1': 'value1', 'key2': 'value2' }
40
+ def init_upload_session(table_name, partition = {})
41
+ path = "/projects/#{project.name}/tables/#{table_name}"
42
+ query = { uploads: true }
43
+ unless partition.empty?
44
+ query.merge!(partition: generate_partition_spec(partition))
45
+ end
46
+
47
+ resp = client.post(path, query: query)
48
+ result = resp.parsed_response
49
+ result = JSON.parse(result) if result.is_a?(String)
50
+
51
+ build_upload_session(result, table_name, query['partition'])
52
+ end
53
+
54
+ private
55
+
56
+ def generate_partition_spec(partition)
57
+ partition.map { |k, v| "#{k}=#{v}" }.join(',')
58
+ end
59
+
60
+ def build_upload_session(result, table_name, partition_spec)
61
+ UploadSession.new(
62
+ result.merge(
63
+ project: project,
64
+ client: client,
65
+ table_name: table_name,
66
+ partition_spec: partition_spec
67
+ ))
68
+ end
69
+
70
+ def build_download_session(result, table_name, partition_spec)
71
+ DownloadSession.new(
72
+ result.merge(
73
+ project: project,
74
+ client: client,
75
+ table_name: table_name,
76
+ partition_spec: partition_spec
77
+ ))
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,9 @@
1
+ module Aliyun
2
+ module Odps
3
+ class UploadBlock < Struct::Base
4
+ property :block_id, :String, required: true
5
+ property :create_time, :String
6
+ property :date, :String
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,132 @@
1
+ require 'odps_protobuf'
2
+ require 'aliyun/odps/tunnel/snappy_writer'
3
+
4
+ module Aliyun
5
+ module Odps
6
+ class UploadSession < Struct::Base
7
+ property :project, :Project, required: true
8
+ property :client, :Client, required: true
9
+
10
+ property :upload_id, :String, required: true
11
+ property :table_name, :String, required: true
12
+ property :partition_spec, :String
13
+ property :status, :String
14
+ property :owner, :String
15
+ property :initiated, :DateTime
16
+ property :schema, :Hash
17
+ property :blocks, :Array, init_with: ->(value) do
18
+ value.map { |v| UploadBlock.new(v) }
19
+ end
20
+
21
+ alias_method :uploaded_block_list=, :blocks=
22
+
23
+ # Upload data with block id
24
+ #
25
+ # @see http://repo.aliyun.com/api-doc/Tunnel/put_create_upload_id/index.html Put Upload Block ID
26
+ #
27
+ # @param block_id [String] specify block_id for this upload, range in 0~19999, new block with replace with old with same blockid
28
+ # @param record_values [Array<Array>] specify the data, a array for your record, with order matched with your schema
29
+ # @param encoding [String] specify the data compression format, supported value: raw, deflate
30
+ #
31
+ # @return [true]
32
+ def upload(block_id, record_values, encoding = 'raw')
33
+ path = "/projects/#{project.name}/tables/#{table_name}"
34
+
35
+ query = { blockid: block_id, uploadid: upload_id }
36
+ query[:partition] = partition_spec if partition_spec
37
+
38
+ headers = build_upload_headers(encoding)
39
+ body = generate_upload_body(record_values, encoding)
40
+
41
+ !!client.put(path, query: query, headers: headers, body: body)
42
+ end
43
+
44
+ # reload this upload session
45
+ #
46
+ # @see http://repo.aliyun.com/api-doc/Tunnel/get_upload_session_status/index.html Get Upload Session Status
47
+ #
48
+ # @return [UploadSession]
49
+ def reload
50
+ path = "/projects/#{project.name}/tables/#{table_name}"
51
+
52
+ query = { uploadid: upload_id }
53
+ query.merge!(partition: partition_spec) if partition_spec
54
+
55
+ resp = client.get(path, query: query)
56
+
57
+ result = resp.parsed_response
58
+ attrs = result.is_a?(String) ? JSON.parse(result) : result
59
+
60
+ update_attrs(attrs)
61
+ end
62
+
63
+ # List uploaded blocks
64
+ #
65
+ # @return [Array<UploadBlock>]
66
+ def list_blocks
67
+ reload
68
+ blocks
69
+ end
70
+
71
+ # Complete the upload session
72
+ #
73
+ # @see http://repo.aliyun.com/api-doc/Tunnel/post_commit_upload_session/index.html Post commit Upload Session
74
+ #
75
+ # @return [true]
76
+ def complete
77
+ path = "/projects/#{project.name}/tables/#{table_name}"
78
+
79
+ query = { uploadid: upload_id }
80
+ query.merge!(partition: partition_spec) if partition_spec
81
+
82
+ headers = { 'x-odps-tunnel-version' => TableTunnels::TUNNEL_VERSION }
83
+
84
+ !!client.post(path, query: query, headers: headers)
85
+ end
86
+
87
+ private
88
+
89
+ def generate_upload_body(record_values, encoding)
90
+ serializer = OdpsProtobuf::Serializer.new
91
+ data = serializer.serialize(record_values, schema)
92
+ compress_data(data, encoding)
93
+ rescue
94
+ raise RecordNotMatchSchemaError.new(record_values, schema)
95
+ end
96
+
97
+ def compress_data(data, encoding)
98
+ case encoding
99
+ when 'raw'
100
+ data
101
+ when 'deflate'
102
+ require 'zlib'
103
+ Zlib::Deflate.deflate(data)
104
+ when 'snappy'
105
+ SnappyWriter.compress(data)
106
+ end
107
+ end
108
+
109
+ def set_content_encoding(headers, encoding)
110
+ case encoding.to_s.downcase
111
+ when 'deflate'
112
+ headers['Content-Encoding'] = 'deflate'
113
+ when 'snappy'
114
+ headers['Content-Encoding'] = 'x-snappy-framed'
115
+ when 'raw'
116
+ headers.delete('Content-Encoding')
117
+ else
118
+ fail ValueNotSupportedError.new(:encoding, TableTunnels::SUPPORTED_ENCODING)
119
+ end
120
+ end
121
+
122
+ def build_upload_headers(encoding)
123
+ headers = {
124
+ 'x-odps-tunnel-version' => TableTunnels::TUNNEL_VERSION,
125
+ 'Transfer-Encoding' => 'chunked'
126
+ }
127
+ set_content_encoding(headers, encoding)
128
+ headers
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,102 @@
1
+ require 'base64'
2
+ require 'openssl'
3
+ require 'digest'
4
+ require 'gyoku'
5
+
6
+ module Aliyun
7
+ module Odps
8
+ class Utils
9
+ class << self
10
+ # Calculate content length
11
+ #
12
+ # @return [Integer]
13
+ def content_size(content)
14
+ if content.respond_to?(:size)
15
+ content.size
16
+ elsif content.is_a?(IO)
17
+ content.stat.size
18
+ end
19
+ end
20
+
21
+ # HexDigest body with MD5
22
+ #
23
+ # @return [String]
24
+ def md5_hexdigest(body)
25
+ Digest::MD5.hexdigest(body).strip
26
+ end
27
+
28
+ # @example
29
+ # # { 'a' => 1, 'c' => 3 }
30
+ # Utils.hash_slice({ 'a' => 1, 'b' => 2, 'c' => 3 }, 'a', 'c')
31
+ #
32
+ # @return [Hash]
33
+ def hash_slice(hash, *selected_keys)
34
+ new_hash = {}
35
+ selected_keys.each { |k| new_hash[k] = hash[k] if hash.key?(k) }
36
+ new_hash
37
+ end
38
+
39
+ # Convert File or Bin data to bin data
40
+ #
41
+ # @return [Bin data]
42
+ def to_data(file_or_bin)
43
+ file_or_bin.respond_to?(:read) ? IO.binread(file_or_bin) : file_or_bin
44
+ end
45
+
46
+ def to_xml(hash, options = {}) # nodoc
47
+ %(<?xml version="1.0" encoding="UTF-8"?>#{Gyoku.xml(hash, options)})
48
+ end
49
+
50
+ # Dig values in deep hash
51
+ #
52
+ # @example
53
+ # dig_value({ 'a' => { 'b' => { 'c' => 3 } } }, 'a', 'b', 'c') # => 3
54
+ #
55
+ def dig_value(hash, *keys)
56
+ new_hash = hash.dup
57
+
58
+ keys.each do |key|
59
+ if new_hash.is_a?(Hash) && new_hash.key?(key)
60
+ new_hash = new_hash[key]
61
+ else
62
+ return nil
63
+ end
64
+ end
65
+ new_hash
66
+ end
67
+
68
+ # @see {http://apidock.com/rails/String/underscore String#underscore}
69
+ def underscore(str)
70
+ word = str.to_s.dup
71
+ word.gsub!(/::/, '/')
72
+ word.gsub!(/([A-Z\d]+)([A-Z][a-z])/, '\1_\2')
73
+ word.gsub!(/([a-z\d])([A-Z])/, '\1_\2')
74
+ word.tr!('-', '_')
75
+ word.downcase!
76
+ word
77
+ end
78
+
79
+ # Copy from {https://github.com/rails/rails/blob/14254d82a90b8aa4bd81f7eeebe33885bf83c378/activesupport/lib/active_support/core_ext/array/wrap.rb#L36 ActiveSupport::Array#wrap}
80
+ def wrap(object)
81
+ if object.nil?
82
+ []
83
+ elsif object.respond_to?(:to_ary)
84
+ object.to_ary || [object]
85
+ else
86
+ [object]
87
+ end
88
+ end
89
+
90
+ def stringify_keys!(hash)
91
+ hash.keys.each do |key|
92
+ hash[key.to_s] = hash.delete(key)
93
+ end
94
+ end
95
+
96
+ def generate_uuid(flag)
97
+ "#{flag}#{Time.now.strftime('%Y%m%d%H%M%S')}#{SecureRandom.hex(3)}"
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end