fluentd-plugin-aliyun-odps 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'stringio'
20
+ require 'protobuf'
21
+ require_relative '../digest/crc32c'
22
+ require_relative '../odps/odps_table'
23
+
24
+ module OdpsDatahub
25
+ #TODO
26
+ class Deserializer
27
+
28
+ def deserialize( )
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,141 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'stringio'
20
+ require 'protobuf'
21
+ require_relative '../exceptions'
22
+ require_relative '../digest/crc32c'
23
+ require_relative '../odps/odps_table'
24
+
25
+ module OdpsDatahub
26
+
27
+ $TUNNEL_META_COUNT = 33554430 # magic num 2^25-2
28
+ $TUNNEL_META_CHECKSUM = 33554431 # magic num 2^25-1
29
+ $TUNNEL_END_RECORD = 33553408 # maigc num 2^25-1024
30
+
31
+ class Serializer
32
+ def encodeBool(value)
33
+ [value ? 1 : 0].pack('C')
34
+ end
35
+
36
+ def encodeDouble(value)
37
+ [value].pack('E')
38
+ end
39
+
40
+ def encodeSInt64(value)
41
+ if value >= 0
42
+ ::Protobuf::Field::VarintField.encode(value << 1)
43
+ else
44
+ ::Protobuf::Field::VarintField.encode(~(value << 1))
45
+ end
46
+ end
47
+
48
+ def encodeUInt32(value)
49
+ return [value].pack('C') if value < 128
50
+ bytes = []
51
+ until value == 0
52
+ bytes << (0x80 | (value & 0x7f))
53
+ value >>= 7
54
+ end
55
+ bytes[-1] &= 0x7f
56
+ bytes.pack('C*')
57
+ end
58
+
59
+ def encodeDataTime(value)
60
+ self.encodeSInt64(value)
61
+ end
62
+
63
+ def encodeString(value)
64
+ value_to_encode = value.dup
65
+ value_to_encode.encode!(::Protobuf::Field::StringField::ENCODING, :invalid => :replace, :undef => :replace, :replace => "")
66
+ value_to_encode.force_encoding(::Protobuf::Field::BytesField::BYTES_ENCODING)
67
+ string_bytes = ::Protobuf::Field::VarintField.encode(value_to_encode.size)
68
+ string_bytes << value_to_encode
69
+ end
70
+
71
+ def encodeFixed64(value)
72
+ # we don't use 'Q' for pack/unpack. 'Q' is machine-dependent.
73
+ [value & 0xffff_ffff, value >> 32].pack('VV')
74
+ end
75
+
76
+ def encodeFixed32(value)
77
+ [value].pack('V')
78
+ end
79
+
80
+ def encodeFixedString(value)
81
+ [value].pack('V')
82
+ end
83
+
84
+ def writeTag(idx, type, stream)
85
+ key = (idx << 3) | type
86
+ stream << ::Protobuf::Field::VarintField.encode(key)
87
+ end
88
+
89
+ def serialize(upStream, recordList)
90
+ crc32cPack = ::Digest::CRC32c.new
91
+ if recordList.is_a?Array
92
+ recordList.each { |record|
93
+ crc32cRecord = ::Digest::CRC32c.new
94
+ schema = OdpsTableSchema.new
95
+ schema = record.getTableSchema
96
+ schema.mCols.each { | col |
97
+ cellValue = record.getValue(col.mIdx)
98
+ if cellValue == nil
99
+ next
100
+ end
101
+ crc32cRecord.update(encodeFixed32(col.mIdx + 1))
102
+ case col.mType
103
+ when $ODPS_BIGINT
104
+ crc32cRecord.update(encodeFixed64(cellValue))
105
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
106
+ upStream.write(encodeSInt64(cellValue))
107
+ when $ODPS_DOUBLE
108
+ crc32cRecord.update(encodeDouble(cellValue))
109
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::FIXED64, upStream)
110
+ upStream.write(encodeDouble(cellValue))
111
+ when $ODPS_BOOLEAN
112
+ crc32cRecord.update(encodeBool(cellValue))
113
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
114
+ upStream.write(encodeBool(cellValue))
115
+ when $ODPS_DATETIME
116
+ crc32cRecord.update(encodeFixed64(cellValue))
117
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
118
+ upStream.write(encodeDataTime(cellValue))
119
+ when $ODPS_STRING
120
+ crc32cRecord.update(cellValue)
121
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
122
+ upStream.write(encodeString(cellValue))
123
+ else
124
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "invalid mType")
125
+ end
126
+ }
127
+ recordCrc = crc32cRecord.checksum.to_i
128
+ writeTag($TUNNEL_END_RECORD, ::Protobuf::WireType::VARINT, upStream)
129
+ upStream.write(encodeUInt32(recordCrc))
130
+ crc32cPack.update(encodeFixed32(recordCrc))
131
+ }
132
+ writeTag($TUNNEL_META_COUNT, ::Protobuf::WireType::VARINT, upStream)
133
+ upStream.write(encodeSInt64(recordList.size))
134
+ writeTag($TUNNEL_META_CHECKSUM, ::Protobuf::WireType::VARINT, upStream)
135
+ upStream.write(encodeUInt32(crc32cPack.checksum))
136
+ else
137
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "param must be a array")
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,111 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'json'
20
+ require_relative 'exceptions'
21
+ require_relative 'stream_writer'
22
+ require_relative 'stream_reader'
23
+ require_relative 'http/http_connection'
24
+ require_relative 'conf/config'
25
+ require_relative 'odps/odps_table'
26
+
27
+ module OdpsDatahub
28
+ class StreamClient
29
+ attr_reader :mProject, :mTable, :mOdpsConfig, :mOdpsTableSchema, :mOdpsTable
30
+ def initialize(odpsConfig, project, table)
31
+ @mOdpsConfig = odpsConfig
32
+ @mProject = project
33
+ @mTable = table
34
+ @mShards = Array.new
35
+ if @mProject == nil or @mProject == ""
36
+ @mProject = @mOdpsConfig.defaultProjectName
37
+ end
38
+ @mOdpsTable = OdpsTable.new(@mOdpsConfig, @mProject, @mTable)
39
+ header = Hash.new
40
+ param = Hash.new
41
+ param[$PARAM_QUERY] = "meta"
42
+ conn = HttpConnection.new(@mOdpsConfig, header, param, getResource, "GET")
43
+ res = conn.getResponse
44
+ jsonTableMeta = JSON.parse(res.body)
45
+ if res.code != "200"
46
+ raise OdpsDatahubException.new(jsonTableMeta["Code"], "initialize failed because " + jsonTableMeta["Message"])
47
+ end
48
+ @mOdpsTableSchema = OdpsTableSchema.new(jsonTableMeta["Schema"])
49
+ end
50
+
51
+ #get partitions and return an array like :[{"time"=>"2016", "place"=>"china2"},{"time"=>"2015", "place"=>"china"}]
52
+ def getPartitionList
53
+ @mOdpsTable.getPartitionList
54
+ end
55
+
56
+ #ptStr ex: 'dt=20150805,hh=08,mm=24'
57
+ #call add partiton if not exsits
58
+ def addPartition(ptStr)
59
+ @mOdpsTable.addPartition(ptStr)
60
+ end
61
+
62
+ def getOdpsTableSchema
63
+ return @mOdpsTableSchema
64
+ end
65
+
66
+ def createStreamWriter(shardId = nil)
67
+ StreamWriter.new(@mOdpsConfig, @mProject, @mTable,getResource, shardId)
68
+ end
69
+
70
+ def createStreamArrayWriter(shardId = nil)
71
+ StreamWriter.new(@mOdpsConfig, @mProject, @mTable,getResource, shardId, @mOdpsTableSchema)
72
+ end
73
+
74
+ #return json like [{"ShardId": "0","State": "loaded"},{"ShardId": "1","State": "loaded"}]
75
+ def getShardStatus
76
+ header = Hash.new
77
+ param = Hash.new
78
+ param[$PARAM_CURR_PROJECT] = @mProject
79
+ param[$PARAM_SHARD_STATUS] = ""
80
+
81
+ conn = HttpConnection.new(@mOdpsConfig, header, param, getResource + "/shards", "GET")
82
+ res = conn.getResponse
83
+ json_obj = JSON.parse(res.body)
84
+ if res.code != "200"
85
+ raise OdpsDatahubException.new(json_obj["Code"], "getShardStatus failed because " + json_obj["Message"])
86
+ end
87
+ return json_obj["ShardStatus"]
88
+ end
89
+
90
+ def loadShard(idx)
91
+ if idx < 0
92
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "loadShard num invalid")
93
+ end
94
+ header = Hash.new
95
+ param = Hash.new
96
+ param[$PARAM_CURR_PROJECT] = @mProject
97
+ param[$PARAM_SHARD_NUMBER] = idx
98
+ conn = HttpConnection.new(@mOdpsConfig, header, param, getResource + "/shards", "POST")
99
+ res = conn.getResponse
100
+ if res.code != "200"
101
+ json_obj = JSON.parse(res.body)
102
+ raise OdpsDatahubException.new(json_obj["Code"], "loadShard failed because " + json_obj["Message"])
103
+ end
104
+ end
105
+
106
+ protected
107
+ def getResource
108
+ return "/projects/" + @mProject + "/tables/" + @mTable
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,53 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'net/http'
20
+ require 'stringio'
21
+ require 'digest/md5'
22
+ require 'zlib'
23
+ require_relative 'http/http_connection'
24
+ require_relative 'serialize/deserializer'
25
+ require_relative 'conf/config'
26
+ require_relative 'odps/xstream_pack.pb'
27
+ require_relative 'odps/odps_table'
28
+
29
+ module OdpsDatahub
30
+ class StreamReader
31
+ attr_reader :mProject, :mTable, :mPath, :mShardId, :mPackId, :mReadMode, :mSchema, :mPackStream
32
+ def initialize(project, table, shardId, path, schema, packId = PackType.FIRST_PACK_ID)
33
+ @mProject = project
34
+ @mTable = table
35
+ @mPath = path
36
+ @mShardId = shardId
37
+ @mSchema = schema
38
+ @mPackId = packId
39
+ @mReadMode = ReadMode.SEEK_BEGIN
40
+ end
41
+ #TODO
42
+ def read #return a pack stream of this pack
43
+ if mPackStream != nil
44
+ mPackStream = getPack
45
+ end
46
+
47
+ end
48
+ #TODO
49
+ def getPack #get cur pack stream
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,152 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'net/http'
20
+ require 'stringio'
21
+ require 'digest/md5'
22
+ require 'zlib'
23
+ require_relative 'exceptions'
24
+ require_relative 'http/http_connection'
25
+ require_relative 'serialize/serializer'
26
+ require_relative 'conf/config'
27
+ require_relative 'odps/xstream_pack.pb'
28
+ require_relative 'odps/odps_table'
29
+
30
+ module OdpsDatahub
31
+ class StreamWriter
32
+ attr_reader :mRecordList, :mProject, :mTable, :mPath, :mShardId, :mUpStream, :mOdpsConfig
33
+ def initialize(odpsConfig, project, table, path, shardId = nil, odpsSchema = nil)
34
+ @mOdpsConfig = odpsConfig
35
+ @mProject = project
36
+ @mTable = table
37
+ @mPath = path
38
+ @mShardId = shardId
39
+ @mSchema = odpsSchema
40
+ reload
41
+ end
42
+
43
+ def reload
44
+ @mUpStream = ::StringIO.new
45
+ @mRecordList = Array.new
46
+ @mUpStream.set_encoding(::Protobuf::Field::BytesField::BYTES_ENCODING)
47
+ end
48
+
49
+ def write(recordList, partition = "")
50
+ if recordList.is_a?Array
51
+ recordList.each{ |value|
52
+ #handle RecordList
53
+ if value.is_a?OdpsTableRecord
54
+ @mRecordList.push(value)
55
+ #handle ArrayList
56
+ elsif value.is_a?Array and @mSchema != nil and value.size == @mSchema.getColumnCount
57
+ record = convert2Record(value)
58
+ @mRecordList.push(record)
59
+ else
60
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "write an error type")
61
+ end
62
+ }
63
+ else
64
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "write param must be a array")
65
+ end
66
+
67
+ serializer = Serializer.new
68
+ serializer.serialize(@mUpStream, @mRecordList)
69
+
70
+ if @mUpStream.length == 0
71
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "mRecordList is empty")
72
+ end
73
+ header = Hash.new
74
+ param = Hash.new
75
+ param[$PARAM_CURR_PROJECT] = @mProject
76
+ #TODO partition format
77
+ param[$PARAM_PARTITION] = partition
78
+ param[$PARAM_RECORD_COUNT] = @mRecordList.size.to_s
79
+ header[$CONTENT_ENCODING] = "deflate"
80
+ header[$CONTENT_TYPE] = "application/octet-stream"
81
+ =begin version 4
82
+ pack = OdpsDatahub::XStreamPack.new
83
+ pack.pack_data = Zlib::Deflate.deflate(@mUpStream.string)
84
+ pack.pack_meta = ""
85
+ upstream = ::StringIO.new
86
+ pack.serialize_to(upstream)
87
+ header[$CONTENT_MD5] = Digest::MD5.hexdigest(upstream.string)
88
+ header[$CONTENT_LENGTH] = upstream.length.to_s
89
+
90
+ conn = HttpConnection.new(@mOdpsConfig, header, param, @mPath + "/shards/" + @mShardId.to_s, "PUT", upstream)
91
+ =end
92
+ #version 3
93
+ upStream = Zlib::Deflate.deflate(@mUpStream.string)
94
+ header[$CONTENT_MD5] = Digest::MD5.hexdigest(upStream)
95
+ header[$CONTENT_LENGTH] = upStream.length.to_s
96
+ #MAX_LENGTH 2048KB
97
+ if upStream.length > $MAX_PACK_SIZE
98
+ raise OdpsDatahubException.new($PACK_SIZE_EXCEED, "pack size:" + upStream.length.to_s)
99
+ end
100
+ if @mShardId != nil
101
+ conn = HttpConnection.new(@mOdpsConfig, header, param, @mPath + "/shards/" + @mShardId.to_s, "PUT", upStream)
102
+ else
103
+ conn = HttpConnection.new(@mOdpsConfig, header, param, @mPath + "/shards", "PUT", upStream)
104
+ end
105
+
106
+ reload
107
+ res = conn.getResponse
108
+ json_obj = JSON.parse(res.body)
109
+ if res.code != "200"
110
+ raise OdpsDatahubException.new(json_obj["Code"], "write failed because " + json_obj["Message"])
111
+ end
112
+ end
113
+
114
+ private
115
+ def convert2Record(value)
116
+ if not value.is_a?Array
117
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "param for convert2Record must be a array")
118
+ end
119
+
120
+ if value.count != @mSchema.getColumnCount
121
+ raise OdpsDatahubException.new($SCHEMA_NOT_MATCH, "column counts are not equal between value and schema")
122
+ end
123
+
124
+ record = OdpsTableRecord.new(@mSchema)
125
+ i = 0
126
+ while i < value.count do
127
+ type = @mSchema.getColumnType(i)
128
+ if value[i] == nil
129
+ record.setNullValue(i)
130
+ i += 1
131
+ next
132
+ end
133
+ case type
134
+ when $ODPS_BIGINT
135
+ record.setBigInt(i, value[i])
136
+ when $ODPS_BOOLEAN
137
+ record.setBoolean(i, value[i])
138
+ when $ODPS_DATETIME
139
+ record.setDateTime(i, value[i])
140
+ when $ODPS_DOUBLE
141
+ record.setDouble(i, value[i])
142
+ when $ODPS_STRING
143
+ record.setString(i, value[i])
144
+ else
145
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "unsupported schema type")
146
+ end
147
+ i += 1
148
+ end
149
+ return record
150
+ end
151
+ end
152
+ end