fluentd-plugin-aliyun-odps 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,32 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'stringio'
20
+ require 'protobuf'
21
+ require_relative '../digest/crc32c'
22
+ require_relative '../odps/odps_table'
23
+
24
+ module OdpsDatahub
25
+ #TODO
26
+ class Deserializer
27
+
28
+ def deserialize( )
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,141 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'stringio'
20
+ require 'protobuf'
21
+ require_relative '../exceptions'
22
+ require_relative '../digest/crc32c'
23
+ require_relative '../odps/odps_table'
24
+
25
+ module OdpsDatahub
26
+
27
+ $TUNNEL_META_COUNT = 33554430 # magic num 2^25-2
28
+ $TUNNEL_META_CHECKSUM = 33554431 # magic num 2^25-1
29
+ $TUNNEL_END_RECORD = 33553408 # maigc num 2^25-1024
30
+
31
+ class Serializer
32
+ def encodeBool(value)
33
+ [value ? 1 : 0].pack('C')
34
+ end
35
+
36
+ def encodeDouble(value)
37
+ [value].pack('E')
38
+ end
39
+
40
+ def encodeSInt64(value)
41
+ if value >= 0
42
+ ::Protobuf::Field::VarintField.encode(value << 1)
43
+ else
44
+ ::Protobuf::Field::VarintField.encode(~(value << 1))
45
+ end
46
+ end
47
+
48
+ def encodeUInt32(value)
49
+ return [value].pack('C') if value < 128
50
+ bytes = []
51
+ until value == 0
52
+ bytes << (0x80 | (value & 0x7f))
53
+ value >>= 7
54
+ end
55
+ bytes[-1] &= 0x7f
56
+ bytes.pack('C*')
57
+ end
58
+
59
+ def encodeDataTime(value)
60
+ self.encodeSInt64(value)
61
+ end
62
+
63
+ def encodeString(value)
64
+ value_to_encode = value.dup
65
+ value_to_encode.encode!(::Protobuf::Field::StringField::ENCODING, :invalid => :replace, :undef => :replace, :replace => "")
66
+ value_to_encode.force_encoding(::Protobuf::Field::BytesField::BYTES_ENCODING)
67
+ string_bytes = ::Protobuf::Field::VarintField.encode(value_to_encode.size)
68
+ string_bytes << value_to_encode
69
+ end
70
+
71
+ def encodeFixed64(value)
72
+ # we don't use 'Q' for pack/unpack. 'Q' is machine-dependent.
73
+ [value & 0xffff_ffff, value >> 32].pack('VV')
74
+ end
75
+
76
+ def encodeFixed32(value)
77
+ [value].pack('V')
78
+ end
79
+
80
+ def encodeFixedString(value)
81
+ [value].pack('V')
82
+ end
83
+
84
+ def writeTag(idx, type, stream)
85
+ key = (idx << 3) | type
86
+ stream << ::Protobuf::Field::VarintField.encode(key)
87
+ end
88
+
89
+ def serialize(upStream, recordList)
90
+ crc32cPack = ::Digest::CRC32c.new
91
+ if recordList.is_a?Array
92
+ recordList.each { |record|
93
+ crc32cRecord = ::Digest::CRC32c.new
94
+ schema = OdpsTableSchema.new
95
+ schema = record.getTableSchema
96
+ schema.mCols.each { | col |
97
+ cellValue = record.getValue(col.mIdx)
98
+ if cellValue == nil
99
+ next
100
+ end
101
+ crc32cRecord.update(encodeFixed32(col.mIdx + 1))
102
+ case col.mType
103
+ when $ODPS_BIGINT
104
+ crc32cRecord.update(encodeFixed64(cellValue))
105
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
106
+ upStream.write(encodeSInt64(cellValue))
107
+ when $ODPS_DOUBLE
108
+ crc32cRecord.update(encodeDouble(cellValue))
109
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::FIXED64, upStream)
110
+ upStream.write(encodeDouble(cellValue))
111
+ when $ODPS_BOOLEAN
112
+ crc32cRecord.update(encodeBool(cellValue))
113
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
114
+ upStream.write(encodeBool(cellValue))
115
+ when $ODPS_DATETIME
116
+ crc32cRecord.update(encodeFixed64(cellValue))
117
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
118
+ upStream.write(encodeDataTime(cellValue))
119
+ when $ODPS_STRING
120
+ crc32cRecord.update(cellValue)
121
+ writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
122
+ upStream.write(encodeString(cellValue))
123
+ else
124
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "invalid mType")
125
+ end
126
+ }
127
+ recordCrc = crc32cRecord.checksum.to_i
128
+ writeTag($TUNNEL_END_RECORD, ::Protobuf::WireType::VARINT, upStream)
129
+ upStream.write(encodeUInt32(recordCrc))
130
+ crc32cPack.update(encodeFixed32(recordCrc))
131
+ }
132
+ writeTag($TUNNEL_META_COUNT, ::Protobuf::WireType::VARINT, upStream)
133
+ upStream.write(encodeSInt64(recordList.size))
134
+ writeTag($TUNNEL_META_CHECKSUM, ::Protobuf::WireType::VARINT, upStream)
135
+ upStream.write(encodeUInt32(crc32cPack.checksum))
136
+ else
137
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "param must be a array")
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,111 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'json'
20
+ require_relative 'exceptions'
21
+ require_relative 'stream_writer'
22
+ require_relative 'stream_reader'
23
+ require_relative 'http/http_connection'
24
+ require_relative 'conf/config'
25
+ require_relative 'odps/odps_table'
26
+
27
+ module OdpsDatahub
28
+ class StreamClient
29
+ attr_reader :mProject, :mTable, :mOdpsConfig, :mOdpsTableSchema, :mOdpsTable
30
+ def initialize(odpsConfig, project, table)
31
+ @mOdpsConfig = odpsConfig
32
+ @mProject = project
33
+ @mTable = table
34
+ @mShards = Array.new
35
+ if @mProject == nil or @mProject == ""
36
+ @mProject = @mOdpsConfig.defaultProjectName
37
+ end
38
+ @mOdpsTable = OdpsTable.new(@mOdpsConfig, @mProject, @mTable)
39
+ header = Hash.new
40
+ param = Hash.new
41
+ param[$PARAM_QUERY] = "meta"
42
+ conn = HttpConnection.new(@mOdpsConfig, header, param, getResource, "GET")
43
+ res = conn.getResponse
44
+ jsonTableMeta = JSON.parse(res.body)
45
+ if res.code != "200"
46
+ raise OdpsDatahubException.new(jsonTableMeta["Code"], "initialize failed because " + jsonTableMeta["Message"])
47
+ end
48
+ @mOdpsTableSchema = OdpsTableSchema.new(jsonTableMeta["Schema"])
49
+ end
50
+
51
+ #get partitions and return an array like :[{"time"=>"2016", "place"=>"china2"},{"time"=>"2015", "place"=>"china"}]
52
+ def getPartitionList
53
+ @mOdpsTable.getPartitionList
54
+ end
55
+
56
+ #ptStr ex: 'dt=20150805,hh=08,mm=24'
57
+ #call add partiton if not exsits
58
+ def addPartition(ptStr)
59
+ @mOdpsTable.addPartition(ptStr)
60
+ end
61
+
62
+ def getOdpsTableSchema
63
+ return @mOdpsTableSchema
64
+ end
65
+
66
+ def createStreamWriter(shardId = nil)
67
+ StreamWriter.new(@mOdpsConfig, @mProject, @mTable,getResource, shardId)
68
+ end
69
+
70
+ def createStreamArrayWriter(shardId = nil)
71
+ StreamWriter.new(@mOdpsConfig, @mProject, @mTable,getResource, shardId, @mOdpsTableSchema)
72
+ end
73
+
74
+ #return json like [{"ShardId": "0","State": "loaded"},{"ShardId": "1","State": "loaded"}]
75
+ def getShardStatus
76
+ header = Hash.new
77
+ param = Hash.new
78
+ param[$PARAM_CURR_PROJECT] = @mProject
79
+ param[$PARAM_SHARD_STATUS] = ""
80
+
81
+ conn = HttpConnection.new(@mOdpsConfig, header, param, getResource + "/shards", "GET")
82
+ res = conn.getResponse
83
+ json_obj = JSON.parse(res.body)
84
+ if res.code != "200"
85
+ raise OdpsDatahubException.new(json_obj["Code"], "getShardStatus failed because " + json_obj["Message"])
86
+ end
87
+ return json_obj["ShardStatus"]
88
+ end
89
+
90
+ def loadShard(idx)
91
+ if idx < 0
92
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "loadShard num invalid")
93
+ end
94
+ header = Hash.new
95
+ param = Hash.new
96
+ param[$PARAM_CURR_PROJECT] = @mProject
97
+ param[$PARAM_SHARD_NUMBER] = idx
98
+ conn = HttpConnection.new(@mOdpsConfig, header, param, getResource + "/shards", "POST")
99
+ res = conn.getResponse
100
+ if res.code != "200"
101
+ json_obj = JSON.parse(res.body)
102
+ raise OdpsDatahubException.new(json_obj["Code"], "loadShard failed because " + json_obj["Message"])
103
+ end
104
+ end
105
+
106
+ protected
107
+ def getResource
108
+ return "/projects/" + @mProject + "/tables/" + @mTable
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,53 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'net/http'
20
+ require 'stringio'
21
+ require 'digest/md5'
22
+ require 'zlib'
23
+ require_relative 'http/http_connection'
24
+ require_relative 'serialize/deserializer'
25
+ require_relative 'conf/config'
26
+ require_relative 'odps/xstream_pack.pb'
27
+ require_relative 'odps/odps_table'
28
+
29
+ module OdpsDatahub
30
+ class StreamReader
31
+ attr_reader :mProject, :mTable, :mPath, :mShardId, :mPackId, :mReadMode, :mSchema, :mPackStream
32
+ def initialize(project, table, shardId, path, schema, packId = PackType.FIRST_PACK_ID)
33
+ @mProject = project
34
+ @mTable = table
35
+ @mPath = path
36
+ @mShardId = shardId
37
+ @mSchema = schema
38
+ @mPackId = packId
39
+ @mReadMode = ReadMode.SEEK_BEGIN
40
+ end
41
+ #TODO
42
+ def read #return a pack stream of this pack
43
+ if mPackStream != nil
44
+ mPackStream = getPack
45
+ end
46
+
47
+ end
48
+ #TODO
49
+ def getPack #get cur pack stream
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,152 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'net/http'
20
+ require 'stringio'
21
+ require 'digest/md5'
22
+ require 'zlib'
23
+ require_relative 'exceptions'
24
+ require_relative 'http/http_connection'
25
+ require_relative 'serialize/serializer'
26
+ require_relative 'conf/config'
27
+ require_relative 'odps/xstream_pack.pb'
28
+ require_relative 'odps/odps_table'
29
+
30
+ module OdpsDatahub
31
+ class StreamWriter
32
+ attr_reader :mRecordList, :mProject, :mTable, :mPath, :mShardId, :mUpStream, :mOdpsConfig
33
+ def initialize(odpsConfig, project, table, path, shardId = nil, odpsSchema = nil)
34
+ @mOdpsConfig = odpsConfig
35
+ @mProject = project
36
+ @mTable = table
37
+ @mPath = path
38
+ @mShardId = shardId
39
+ @mSchema = odpsSchema
40
+ reload
41
+ end
42
+
43
+ def reload
44
+ @mUpStream = ::StringIO.new
45
+ @mRecordList = Array.new
46
+ @mUpStream.set_encoding(::Protobuf::Field::BytesField::BYTES_ENCODING)
47
+ end
48
+
49
+ def write(recordList, partition = "")
50
+ if recordList.is_a?Array
51
+ recordList.each{ |value|
52
+ #handle RecordList
53
+ if value.is_a?OdpsTableRecord
54
+ @mRecordList.push(value)
55
+ #handle ArrayList
56
+ elsif value.is_a?Array and @mSchema != nil and value.size == @mSchema.getColumnCount
57
+ record = convert2Record(value)
58
+ @mRecordList.push(record)
59
+ else
60
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "write an error type")
61
+ end
62
+ }
63
+ else
64
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "write param must be a array")
65
+ end
66
+
67
+ serializer = Serializer.new
68
+ serializer.serialize(@mUpStream, @mRecordList)
69
+
70
+ if @mUpStream.length == 0
71
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "mRecordList is empty")
72
+ end
73
+ header = Hash.new
74
+ param = Hash.new
75
+ param[$PARAM_CURR_PROJECT] = @mProject
76
+ #TODO partition format
77
+ param[$PARAM_PARTITION] = partition
78
+ param[$PARAM_RECORD_COUNT] = @mRecordList.size.to_s
79
+ header[$CONTENT_ENCODING] = "deflate"
80
+ header[$CONTENT_TYPE] = "application/octet-stream"
81
+ =begin version 4
82
+ pack = OdpsDatahub::XStreamPack.new
83
+ pack.pack_data = Zlib::Deflate.deflate(@mUpStream.string)
84
+ pack.pack_meta = ""
85
+ upstream = ::StringIO.new
86
+ pack.serialize_to(upstream)
87
+ header[$CONTENT_MD5] = Digest::MD5.hexdigest(upstream.string)
88
+ header[$CONTENT_LENGTH] = upstream.length.to_s
89
+
90
+ conn = HttpConnection.new(@mOdpsConfig, header, param, @mPath + "/shards/" + @mShardId.to_s, "PUT", upstream)
91
+ =end
92
+ #version 3
93
+ upStream = Zlib::Deflate.deflate(@mUpStream.string)
94
+ header[$CONTENT_MD5] = Digest::MD5.hexdigest(upStream)
95
+ header[$CONTENT_LENGTH] = upStream.length.to_s
96
+ #MAX_LENGTH 2048KB
97
+ if upStream.length > $MAX_PACK_SIZE
98
+ raise OdpsDatahubException.new($PACK_SIZE_EXCEED, "pack size:" + upStream.length.to_s)
99
+ end
100
+ if @mShardId != nil
101
+ conn = HttpConnection.new(@mOdpsConfig, header, param, @mPath + "/shards/" + @mShardId.to_s, "PUT", upStream)
102
+ else
103
+ conn = HttpConnection.new(@mOdpsConfig, header, param, @mPath + "/shards", "PUT", upStream)
104
+ end
105
+
106
+ reload
107
+ res = conn.getResponse
108
+ json_obj = JSON.parse(res.body)
109
+ if res.code != "200"
110
+ raise OdpsDatahubException.new(json_obj["Code"], "write failed because " + json_obj["Message"])
111
+ end
112
+ end
113
+
114
+ private
115
+ def convert2Record(value)
116
+ if not value.is_a?Array
117
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "param for convert2Record must be a array")
118
+ end
119
+
120
+ if value.count != @mSchema.getColumnCount
121
+ raise OdpsDatahubException.new($SCHEMA_NOT_MATCH, "column counts are not equal between value and schema")
122
+ end
123
+
124
+ record = OdpsTableRecord.new(@mSchema)
125
+ i = 0
126
+ while i < value.count do
127
+ type = @mSchema.getColumnType(i)
128
+ if value[i] == nil
129
+ record.setNullValue(i)
130
+ i += 1
131
+ next
132
+ end
133
+ case type
134
+ when $ODPS_BIGINT
135
+ record.setBigInt(i, value[i])
136
+ when $ODPS_BOOLEAN
137
+ record.setBoolean(i, value[i])
138
+ when $ODPS_DATETIME
139
+ record.setDateTime(i, value[i])
140
+ when $ODPS_DOUBLE
141
+ record.setDouble(i, value[i])
142
+ when $ODPS_STRING
143
+ record.setString(i, value[i])
144
+ else
145
+ raise OdpsDatahubException.new($INVALID_ARGUMENT, "unsupported schema type")
146
+ end
147
+ i += 1
148
+ end
149
+ return record
150
+ end
151
+ end
152
+ end