fluent-plugin-aliyun-odps 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile +3 -0
- data/License +13 -0
- data/README.md +120 -0
- data/Rakefile +14 -0
- data/VERSION +1 -0
- data/fluent-plugin-aliyun-odps.gemspec +27 -0
- data/lib/fluent/plugin/conf/config.rb +32 -0
- data/lib/fluent/plugin/digest/crc.rb +120 -0
- data/lib/fluent/plugin/digest/crc32.rb +125 -0
- data/lib/fluent/plugin/digest/crc32c.rb +105 -0
- data/lib/fluent/plugin/exceptions.rb +49 -0
- data/lib/fluent/plugin/http/http_connection.rb +130 -0
- data/lib/fluent/plugin/http/http_flag.rb +73 -0
- data/lib/fluent/plugin/odps/odps_table.rb +253 -0
- data/lib/fluent/plugin/odps/odps_table_schema.rb +64 -0
- data/lib/fluent/plugin/odps/xml_template.rb +57 -0
- data/lib/fluent/plugin/odps/xstream_pack.pb.rb +21 -0
- data/lib/fluent/plugin/odps/xstream_pack.proto +8 -0
- data/lib/fluent/plugin/out_odps.rb +373 -0
- data/lib/fluent/plugin/serialize/deserializer.rb +32 -0
- data/lib/fluent/plugin/serialize/serializer.rb +141 -0
- data/lib/fluent/plugin/stream_client.rb +111 -0
- data/lib/fluent/plugin/stream_reader.rb +53 -0
- data/lib/fluent/plugin/stream_writer.rb +152 -0
- data/odps_example.conf +30 -0
- metadata +174 -0
@@ -0,0 +1,105 @@
|
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
require_relative 'crc32'
|
20
|
+
module Digest
|
21
|
+
#
|
22
|
+
# Implements the CRC32c algorithm.
|
23
|
+
#
|
24
|
+
class CRC32c < CRC32
|
25
|
+
# Generated by `./pycrc.py --algorithm=table-driven --model=crc-32c --generate=c`
|
26
|
+
TABLE = [
|
27
|
+
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
|
28
|
+
0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
|
29
|
+
0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
|
30
|
+
0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
|
31
|
+
0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
|
32
|
+
0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
|
33
|
+
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
|
34
|
+
0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
|
35
|
+
0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
|
36
|
+
0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
|
37
|
+
0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
|
38
|
+
0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
|
39
|
+
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
|
40
|
+
0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
|
41
|
+
0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
|
42
|
+
0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
|
43
|
+
0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
|
44
|
+
0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
|
45
|
+
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
|
46
|
+
0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
|
47
|
+
0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
|
48
|
+
0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
|
49
|
+
0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
|
50
|
+
0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
|
51
|
+
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
|
52
|
+
0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
|
53
|
+
0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
|
54
|
+
0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
|
55
|
+
0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
|
56
|
+
0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
|
57
|
+
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
|
58
|
+
0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
|
59
|
+
0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
|
60
|
+
0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
|
61
|
+
0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
|
62
|
+
0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
|
63
|
+
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
|
64
|
+
0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
|
65
|
+
0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
|
66
|
+
0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
|
67
|
+
0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
|
68
|
+
0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
|
69
|
+
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
|
70
|
+
0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
|
71
|
+
0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
|
72
|
+
0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
|
73
|
+
0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
|
74
|
+
0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
|
75
|
+
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
|
76
|
+
0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
|
77
|
+
0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
|
78
|
+
0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
|
79
|
+
0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
|
80
|
+
0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
|
81
|
+
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
|
82
|
+
0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
|
83
|
+
0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
|
84
|
+
0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
|
85
|
+
0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
|
86
|
+
0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
|
87
|
+
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
|
88
|
+
0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
|
89
|
+
0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
|
90
|
+
0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351
|
91
|
+
]
|
92
|
+
#
|
93
|
+
# Updates the CRC32 checksum.
|
94
|
+
#
|
95
|
+
# @param [String] data
|
96
|
+
# The data to update the checksum with.
|
97
|
+
#
|
98
|
+
def update(data)
|
99
|
+
data.each_byte do |b|
|
100
|
+
@crc = (((@crc >> 8) & 0x00ffffff) ^ TABLE[(@crc ^ b) & 0xff])
|
101
|
+
end
|
102
|
+
return self
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
module OdpsDatahub
|
20
|
+
$INVALID_ARGUMENT = "InvalidArgument"
|
21
|
+
$SCHEMA_NOT_MATCH = "SchemaNotMatch"
|
22
|
+
$PACK_SIZE_EXCEED = "PackSizeExceed"
|
23
|
+
$ACCESS_DENIED = "AccessDenied"
|
24
|
+
|
25
|
+
class OdpsDatahubException < StandardError
|
26
|
+
attr_reader :mMessage, :mErrorCode, :mRequestId
|
27
|
+
def initialize(code, msg, reqId = "")
|
28
|
+
@mMessage = msg
|
29
|
+
@mErrorCode = code
|
30
|
+
@mRequestId = reqId
|
31
|
+
end
|
32
|
+
|
33
|
+
def getMessage
|
34
|
+
@mMessage
|
35
|
+
end
|
36
|
+
|
37
|
+
def getCode
|
38
|
+
@mErrorCode
|
39
|
+
end
|
40
|
+
|
41
|
+
def getRequestId
|
42
|
+
@mRequestId
|
43
|
+
end
|
44
|
+
|
45
|
+
def to_s
|
46
|
+
return "ErrorCode: " + @mErrorCode + ", Message: " + @mMessage + ", requestId:" + @mRequestId
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
require 'net/http'
|
20
|
+
require 'base64'
|
21
|
+
require 'openssl'
|
22
|
+
require_relative '../exceptions'
|
23
|
+
require_relative '../conf/config'
|
24
|
+
require_relative 'http_flag'
|
25
|
+
Net::HTTP.version_1_2
|
26
|
+
|
27
|
+
module OdpsDatahub
|
28
|
+
class HttpConnection
|
29
|
+
attr_reader :mHeader, :mParam, :mUri, :mReq, :mMethod, :mPath, :mStream, :mOdpsConfig
|
30
|
+
def initialize(odpsConfig, headers, params, path, method, stream = nil, isodpsurl = false)
|
31
|
+
@mOdpsConfig = odpsConfig
|
32
|
+
@mHeader = headers
|
33
|
+
@mParam = params
|
34
|
+
@mPath = path
|
35
|
+
@mMethod = method
|
36
|
+
@mStream = stream
|
37
|
+
@mIsOdpsUrl = isodpsurl
|
38
|
+
buildRequest
|
39
|
+
end
|
40
|
+
|
41
|
+
def buildRequest
|
42
|
+
path = ""
|
43
|
+
separater = '?'
|
44
|
+
@mParam.each { |key , value|
|
45
|
+
if value != ""
|
46
|
+
path += separater + key.to_s + '=' + value.to_s
|
47
|
+
else
|
48
|
+
path += separater + key.to_s
|
49
|
+
end
|
50
|
+
separater = '&'
|
51
|
+
}
|
52
|
+
if @mIsOdpsUrl
|
53
|
+
@mUri = URI.parse(@mOdpsConfig.odpsEndpoint + @mPath + path)
|
54
|
+
else
|
55
|
+
@mUri = URI.parse(@mOdpsConfig.datahubEndpoint + @mPath + path)
|
56
|
+
end
|
57
|
+
|
58
|
+
if !@mHeader.has_key?($CONTENT_MD5)
|
59
|
+
@mHeader[$CONTENT_MD5] = ""
|
60
|
+
end
|
61
|
+
if !@mHeader.has_key?($CONTENT_TYPE)
|
62
|
+
@mHeader[$CONTENT_TYPE] = ""
|
63
|
+
end
|
64
|
+
@mHeader[$DATE] = Time.now.utc.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
65
|
+
if not @mIsOdpsUrl
|
66
|
+
@mHeader[$TUNNEL_STREAM_VERSION] = "1"
|
67
|
+
@mHeader[$TUNNEL_VERSION] = "3"
|
68
|
+
end
|
69
|
+
@mHeader[$USER_AGENT] = $SDK_UA_STR + @mOdpsConfig.userAgent
|
70
|
+
@mHeader[$AUTHORIZATION] = signAuthorization
|
71
|
+
case @mMethod
|
72
|
+
when "POST"
|
73
|
+
@mReq = ::Net::HTTP::Post.new(mUri.to_s, @mHeader)
|
74
|
+
@mReq.body = @mStream
|
75
|
+
when "GET"
|
76
|
+
@mReq = ::Net::HTTP::Get.new(mUri.to_s, @mHeader)
|
77
|
+
when "PUT"
|
78
|
+
@mReq = ::Net::HTTP::Put.new(mUri.to_s, @mHeader)
|
79
|
+
@mReq.body = @mStream
|
80
|
+
else
|
81
|
+
raise OdpsDatahubException.new($INVALID_ARGUMENT, "invalid method")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def getResponse()
|
86
|
+
res = Net::HTTP.start(@mUri.host, @mUri.port) {|http|
|
87
|
+
http.request(@mReq)
|
88
|
+
}
|
89
|
+
return res
|
90
|
+
end
|
91
|
+
|
92
|
+
def signAuthorization
|
93
|
+
prefix = "x-odps-"
|
94
|
+
stringToSign = @mMethod + "\n"
|
95
|
+
accessKey = @mOdpsConfig.accessKey
|
96
|
+
headerMapDown = Hash.new
|
97
|
+
|
98
|
+
@mHeader.each { |key , value|
|
99
|
+
keyDown = key.downcase
|
100
|
+
headerMapDown[keyDown] = value
|
101
|
+
}
|
102
|
+
headerArray = headerMapDown.sort
|
103
|
+
|
104
|
+
headerArray.each { |key , value|
|
105
|
+
if key.start_with?(prefix)
|
106
|
+
stringToSign << key << ":" << value
|
107
|
+
stringToSign << "\n"
|
108
|
+
elsif key == 'content-type' or key == 'content-md5' or key == 'date'
|
109
|
+
stringToSign << value
|
110
|
+
stringToSign << "\n"
|
111
|
+
end
|
112
|
+
}
|
113
|
+
|
114
|
+
signParam = ""
|
115
|
+
separater = '?'
|
116
|
+
paramArray = @mParam.sort
|
117
|
+
paramArray.each { |key , value|
|
118
|
+
if value != ""
|
119
|
+
signParam += separater + key.to_s + '=' + value.to_s
|
120
|
+
else
|
121
|
+
signParam += separater + key.to_s
|
122
|
+
end
|
123
|
+
separater = '&'
|
124
|
+
}
|
125
|
+
stringToSign += @mPath + signParam
|
126
|
+
#puts stringToSign
|
127
|
+
signedStr = "ODPS " + @mOdpsConfig.accessId + ":" + Base64.encode64("#{OpenSSL::HMAC.digest('sha1', accessKey, stringToSign)}").to_s
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
module OdpsDatahub
|
20
|
+
$SDK_UA_STR = "ODPS Ruby SDK v0.1"
|
21
|
+
$MAX_PACK_SIZE = 2048*1024
|
22
|
+
class HttpHeaders
|
23
|
+
$AUTHORIZATION = "Authorization"
|
24
|
+
$CACHE_CONTROL = "Cache-Control"
|
25
|
+
$CONTENT_DISPOSITION = "Content-Disposition"
|
26
|
+
$CONTENT_ENCODING = "Content-Encoding"
|
27
|
+
$CONTENT_LENGTH = "Content-Length"
|
28
|
+
$CONTENT_MD5 = "Content-MD5"
|
29
|
+
$CONTENT_TYPE = "Content-Type"
|
30
|
+
$DATE = "Date"
|
31
|
+
$ETAG = "ETag"
|
32
|
+
$EXPIRES = "Expires"
|
33
|
+
$HOST = "Host"
|
34
|
+
$LAST_MODIFIED = "Last-Modified"
|
35
|
+
$RANGE = "Range"
|
36
|
+
$LOCATION = "Location"
|
37
|
+
$TRANSFER_ENCODING = "Transfer-Encoding"
|
38
|
+
$CHUNKED = "chunked"
|
39
|
+
$ACCEPT_ENCODING = "Accept-Encoding"
|
40
|
+
$USER_AGENT = "User-Agent"
|
41
|
+
$TUNNEL_VERSION = "x-odps-tunnel-version"
|
42
|
+
$TUNNEL_STREAM_VERSION = "x-odps-tunnel-stream-version"
|
43
|
+
end
|
44
|
+
class HttpParam
|
45
|
+
$PARAM_RECORD_COUNT = "recordcount"
|
46
|
+
$PARAM_PACK_ID = "packid"
|
47
|
+
$PARAM_PACK_NUM = "packnum"
|
48
|
+
$PARAM_ITERATE_MODE = "iteratemode"
|
49
|
+
$PARAM_ITER_MODE_AT_PACKID = "AT_PACKID"
|
50
|
+
$PARAM_ITER_MODE_AFTER_PACKID = "AFTER_PACKID"
|
51
|
+
$PARAM_ITER_MODE_FIRST_PACK = "FIRST_PACK"
|
52
|
+
$PARAM_ITER_MODE_LAST_PACK = "LAST_PACK"
|
53
|
+
$PARAM_SHARD_NUMBER = "shardnumber"
|
54
|
+
$PARAM_SHARD_STATUS = "shardstatus"
|
55
|
+
$PARAM_PARTITION = "partition"
|
56
|
+
$PARAM_PARTITIONS = "partitions"
|
57
|
+
$PARAM_SEEK_TIME = "timestamp"
|
58
|
+
$PARAM_CURR_PROJECT = "curr_project"
|
59
|
+
$PARAM_TYPE = "type"
|
60
|
+
$PARAM_QUERY = "query"
|
61
|
+
$PARAM_EXPECT_MARKER = "expectmarker"
|
62
|
+
$PARAM_MARKER = "marker"
|
63
|
+
end
|
64
|
+
class PackType
|
65
|
+
@@FIRST_PACK_ID = "00000000000000000000000000000000"
|
66
|
+
end
|
67
|
+
class ReadMode
|
68
|
+
@@SEEK_BEGIN = 1
|
69
|
+
@@SEEK_END = 2
|
70
|
+
@@SEEK_CUR = 3
|
71
|
+
@@SEEK_NEXT = 4
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
require 'rexml/document'
|
20
|
+
require_relative 'xml_template'
|
21
|
+
require_relative 'odps_table_schema'
|
22
|
+
require_relative '../http/http_connection'
|
23
|
+
|
24
|
+
module OdpsDatahub
|
25
|
+
$STRING_MAX_LENTH = 8 * 1024 * 1024
|
26
|
+
$DATETIME_MAX_TICKS = 253402271999000
|
27
|
+
$DATETIME_MIN_TICKS = -62135798400000
|
28
|
+
$STRING_CHARSET = "UTF-8"
|
29
|
+
class OdpsTableRecord
|
30
|
+
attr_reader :mValues, :mSchema
|
31
|
+
|
32
|
+
def initialize(schema)
|
33
|
+
@mSchema = schema
|
34
|
+
@mValues = Array.new(@mSchema.getColumnCount)
|
35
|
+
end
|
36
|
+
|
37
|
+
def getColumnsCount
|
38
|
+
@mSchema.getColumnCount
|
39
|
+
end
|
40
|
+
|
41
|
+
def getTableSchema
|
42
|
+
@mSchema
|
43
|
+
end
|
44
|
+
|
45
|
+
def getValue(idx)
|
46
|
+
if idx < 0 or idx >= @mSchema.getColumnCount
|
47
|
+
raise "idx out of range"
|
48
|
+
end
|
49
|
+
@mValues.at(idx)
|
50
|
+
end
|
51
|
+
|
52
|
+
def setNullValue(idx)
|
53
|
+
setValue(idx, nil)
|
54
|
+
end
|
55
|
+
|
56
|
+
def setBigInt(idx, value)
|
57
|
+
if value.is_a?Integer
|
58
|
+
setValue(idx, value)
|
59
|
+
else
|
60
|
+
raise "value show be Integer"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def setDouble(idx, value)
|
65
|
+
if value.is_a?Float
|
66
|
+
setValue(idx, value)
|
67
|
+
else
|
68
|
+
raise "value show be Float"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def setBoolean(idx, value)
|
73
|
+
if value != false and value != true
|
74
|
+
raise "value must be bool"
|
75
|
+
end
|
76
|
+
setValue(idx, value)
|
77
|
+
end
|
78
|
+
|
79
|
+
def setDateTime(idx, value)
|
80
|
+
if value.is_a?Integer and value >= $DATETIME_MIN_TICKS and value <=> $DATETIME_MAX_TICKS
|
81
|
+
setValue(idx, value)
|
82
|
+
else
|
83
|
+
raise "DateTime out of range or value show be Integer"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def setString(idx, value)
|
88
|
+
if value.is_a?String and value.length < $STRING_MAX_LENTH
|
89
|
+
setValue(idx, value)
|
90
|
+
else
|
91
|
+
raise "value show be String and len < #$STRING_MAX_LENTH"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
def setValue(idx, value)
|
97
|
+
if idx < 0 or idx >= @mSchema.getColumnCount
|
98
|
+
raise "idx out of range"
|
99
|
+
end
|
100
|
+
@mValues[idx] = value
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
class OdpsTable
|
105
|
+
def initialize(odpsConfig, projectName, tableName)
|
106
|
+
@mOdpsConfig = odpsConfig
|
107
|
+
@mProjectName = projectName
|
108
|
+
@mTableName = tableName
|
109
|
+
end
|
110
|
+
|
111
|
+
#get partitions and return an array like :[{"time"=>"2016", "place"=>"china2"},{"time"=>"2015", "place"=>"china"}]
|
112
|
+
def getPartitionList
|
113
|
+
partitionList = Array.new
|
114
|
+
url = "/projects/" + @mProjectName +"/tables/" + @mTableName
|
115
|
+
lastMarker = nil
|
116
|
+
isEnd = false
|
117
|
+
while !isEnd do
|
118
|
+
header = Hash.new
|
119
|
+
param = Hash.new
|
120
|
+
param[$PARAM_CURR_PROJECT] = @mProjectName
|
121
|
+
param[$PARAM_EXPECT_MARKER] = true
|
122
|
+
param[$PARAM_PARTITIONS] = ""
|
123
|
+
if lastMarker != nil
|
124
|
+
param[$PARAM_MARKER] = lastMarker
|
125
|
+
end
|
126
|
+
conn = HttpConnection.new(@mOdpsConfig, header, param, url, "GET", "", true)
|
127
|
+
res = conn.getResponse
|
128
|
+
if res.code != "200"
|
129
|
+
return partitionList
|
130
|
+
#raise OdpsDatahubException.new($INVALID_ARGUMENT, "This not a partitioned table")
|
131
|
+
end
|
132
|
+
|
133
|
+
doc = REXML::Document.new(res.body.to_s)
|
134
|
+
|
135
|
+
#parse partitions
|
136
|
+
partitionsXml = doc.root.get_elements("Partition")
|
137
|
+
partitionsXml.each { |partition|
|
138
|
+
partitionInfo = Hash.new
|
139
|
+
partition.elements.each { |column|
|
140
|
+
partitionInfo[column.attributes["Name"]] = column.attributes["Value"]
|
141
|
+
}
|
142
|
+
partitionList.push(partitionInfo)
|
143
|
+
}
|
144
|
+
|
145
|
+
#get marker
|
146
|
+
markerXml = doc.root.get_elements("Marker")
|
147
|
+
if markerXml[0].text == nil
|
148
|
+
isEnd = true
|
149
|
+
elsif
|
150
|
+
lastMarker = markerXml[0].text
|
151
|
+
end
|
152
|
+
end
|
153
|
+
return partitionList
|
154
|
+
end
|
155
|
+
|
156
|
+
#ptStr ex: 'dt=20150805,hh=08,mm=24'
|
157
|
+
#call add partiton if not exsits
|
158
|
+
def addPartition(ptStr)
|
159
|
+
pts_array = ptStr.split(',')
|
160
|
+
sqlstr = "ALTER TABLE " + @mProjectName + "." + @mTableName
|
161
|
+
sqlstr = sqlstr + " ADD IF NOT EXISTS" + " PARTITION ("
|
162
|
+
pts_array.each { |pt|
|
163
|
+
ptkv = pt.split('=')
|
164
|
+
if ptkv.size != 2
|
165
|
+
raise "invalid partition spec" + pt
|
166
|
+
end
|
167
|
+
sqlstr += ptkv[0] + '=' + "'" + ptkv[1] + "'" + ','
|
168
|
+
}
|
169
|
+
sqlstr = sqlstr[0..-2] + ");"
|
170
|
+
taskName = "SQLAddPartitionTask"
|
171
|
+
runSQL(taskName, sqlstr)
|
172
|
+
end
|
173
|
+
|
174
|
+
def runSQL(taskName, sqlstring)
|
175
|
+
task_xml = XmlTemplate.getTaskXml(taskName, sqlstring)
|
176
|
+
|
177
|
+
job_xml = genJobXml('arbitriary_job', '9', "", task_xml)
|
178
|
+
headers = Hash.new
|
179
|
+
headers['Content-Type'] = 'application/xml'
|
180
|
+
headers['Content-MD5'] = Digest::MD5.hexdigest(job_xml)
|
181
|
+
headers['Content-Length'] = job_xml.size.to_s
|
182
|
+
|
183
|
+
params = Hash.new
|
184
|
+
|
185
|
+
url = "/projects/" + @mProjectName +"/instances"
|
186
|
+
conn = HttpConnection.new(@mOdpsConfig, headers, params, url, 'POST', job_xml, true)
|
187
|
+
|
188
|
+
res = conn.getResponse
|
189
|
+
if res.code != '200'
|
190
|
+
raise "Add partition failed with error" + res.code.to_s
|
191
|
+
end
|
192
|
+
|
193
|
+
if res.to_hash['Content-Length'] != "0" and not res.body.to_s.include?"Instance"
|
194
|
+
raise res.body
|
195
|
+
end
|
196
|
+
|
197
|
+
waitForSQLComplete(res)
|
198
|
+
end
|
199
|
+
|
200
|
+
#TODO support mulit task
|
201
|
+
def genJobXml(name, priority, comment, taskStr, runMode='sequence')
|
202
|
+
job_xml = XmlTemplate.getJobXml(name, priority, comment, taskStr, runMode)
|
203
|
+
return job_xml
|
204
|
+
end
|
205
|
+
|
206
|
+
def waitForSQLComplete(res)
|
207
|
+
ret_headers = res.to_hash
|
208
|
+
instanceurl = "/projects/" + @mProjectName +"/instances" + "/" + ret_headers['location'][0].split('/')[-1]
|
209
|
+
|
210
|
+
headers = Hash.new
|
211
|
+
params = Hash.new
|
212
|
+
params['taskstatus'] = ""
|
213
|
+
res = nil
|
214
|
+
|
215
|
+
while true
|
216
|
+
conn = HttpConnection.new(@mOdpsConfig, headers, params, instanceurl, 'GET', "", true)
|
217
|
+
res = conn.getResponse
|
218
|
+
doc = REXML::Document.new(res.body.to_s)
|
219
|
+
insStatus = doc.root.elements["Status"].text
|
220
|
+
if insStatus == 'Terminated'
|
221
|
+
break;
|
222
|
+
elsif insStatus == 'Running' or insStatus == 'Suspended'
|
223
|
+
sleep(5)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
doc.root.elements.each('Tasks/Task') { |e|
|
228
|
+
status = e.elements['Status'].text
|
229
|
+
name = e.elements['Name'].text
|
230
|
+
if status.to_s != 'Success'
|
231
|
+
raise getTaskResult(instanceurl, name.to_s)
|
232
|
+
end
|
233
|
+
}
|
234
|
+
end
|
235
|
+
|
236
|
+
def getTaskResult(instanceurl, name)
|
237
|
+
headers = Hash.new
|
238
|
+
params = Hash.new
|
239
|
+
params['result'] = ""
|
240
|
+
res = nil
|
241
|
+
|
242
|
+
conn = HttpConnection.new(@mOdpsConfig, headers, params, instanceurl, 'GET', "", true)
|
243
|
+
res = conn.getResponse
|
244
|
+
doc = REXML::Document.new(res.body.to_s)
|
245
|
+
doc.root.elements.each('Tasks/Task') { |e|
|
246
|
+
taskname = e.elements['Name'].text
|
247
|
+
if taskname == name.to_s
|
248
|
+
return e.elements['Result'].cdatas().to_s
|
249
|
+
end
|
250
|
+
}
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|