fluent-plugin-aliyun-odps 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -1
- data/README.md +1 -0
- data/VERSION +1 -1
- data/lib/fluent/plugin/conf/config.rb +5 -0
- data/lib/fluent/plugin/crc/crc.rb +53 -0
- data/lib/fluent/plugin/crc/lib/linux/crc32c.so +0 -0
- data/lib/fluent/plugin/crc/lib/win/crc32c.so +0 -0
- data/lib/fluent/plugin/{digest → crc/origin}/crc.rb +0 -0
- data/lib/fluent/plugin/{digest → crc/origin}/crc32.rb +0 -0
- data/lib/fluent/plugin/{digest → crc/origin}/crc32c.rb +0 -0
- data/lib/fluent/plugin/http/http_connection.rb +1 -0
- data/lib/fluent/plugin/http/http_flag.rb +1 -1
- data/lib/fluent/plugin/odps/odps_table.rb +1 -1
- data/lib/fluent/plugin/out_aliyun_odps.rb +5 -0
- data/lib/fluent/plugin/serialize/deserializer.rb +1 -1
- data/lib/fluent/plugin/serialize/serializer.rb +13 -13
- data/lib/fluent/plugin/stream_writer.rb +2 -1
- data/odps_example.conf +1 -0
- metadata +8 -6
- data/README.cn.md +0 -112
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e49e64231330fc82748d84415440558c8503b30
|
4
|
+
data.tar.gz: e328545513c76cf9f90cded935e7548037ff83ef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a032d3ea1acdfd27f9f002a1d9eacb5a2b516c18f042b9a8f6aa4d6235220bf7e8c2da9aa7bc321a628d96dee364fbbdbef48ec06a97c404b208ea02b66e3ab4
|
7
|
+
data.tar.gz: 5bdc7642e0a3d1d58dcdc0bc6afc1288d152b21703f79db3a66edb3efdd156637481340f280d58d977be596b68fefe09b4773a78355f3827c881c37f19cda484
|
data/CHANGELOG.md
CHANGED
@@ -3,4 +3,6 @@ Fix datetime format bug, support String, DateTime, Time type when write to a dat
|
|
3
3
|
0.0.5
|
4
4
|
Add reload shard when import fails, and remove unload shard operation when shut down.
|
5
5
|
0.0.6
|
6
|
-
Add decimal support��fix string input while setting double and int.
|
6
|
+
Add decimal support��fix string input while setting double and int.
|
7
|
+
0.0.7
|
8
|
+
Add error msg when add partition fail, support fast crc�� remove pack size limit.
|
data/README.md
CHANGED
@@ -98,6 +98,7 @@ $ cp aliyun-odps-fluentd-plugin/lib/fluent/plugin/* {YOUR_FLUENTD_DIRECTORY}/lib
|
|
98
98
|
- time_format(Optional):
|
99
99
|
- if you are using the key words to set your <partition> and the key word is in time format, please set the param <time_format>. example: source[datetime] = "29/Aug/2015:11:10:16 +0800", and the param <time_format> is "%d/%b/%Y:%H:%M:%S %z"
|
100
100
|
- shard_number(Optional): will write data to shards between [0,shard_number-1], this config must more than 0 and less than the max shard number of your table.
|
101
|
+
- enable_fast_crc(Optional): use fast crc.so to calculate crc, this will improve speed up a lot, but this is not supported in some os.
|
101
102
|
|
102
103
|
## Useful Links
|
103
104
|
---
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
@@ -17,6 +17,7 @@
|
|
17
17
|
#under the License.
|
18
18
|
#
|
19
19
|
module OdpsDatahub
|
20
|
+
$USE_FAST_CRC = false
|
20
21
|
class OdpsConfig
|
21
22
|
attr_accessor :accessId, :accessKey, :odpsEndpoint, :datahubEndpoint, :defaultProjectName, :userAgent
|
22
23
|
|
@@ -28,5 +29,9 @@ module OdpsDatahub
|
|
28
29
|
@defaultProject = defaultProjectName
|
29
30
|
@userAgent = ""
|
30
31
|
end
|
32
|
+
|
33
|
+
def self.setFastCrc(value)
|
34
|
+
$USE_FAST_CRC = value
|
35
|
+
end
|
31
36
|
end
|
32
37
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
require 'rbconfig'
|
20
|
+
module OdpsDatahub
|
21
|
+
class CrcCalculator
|
22
|
+
# @param [StringIO] data
|
23
|
+
# @return crc32c to_i
|
24
|
+
def self.calculate(data)
|
25
|
+
if (!$USE_FAST_CRC)
|
26
|
+
require_relative 'origin/crc32c'
|
27
|
+
crc32c = Digest::CRC32c.new
|
28
|
+
crc32c.update(data.string)
|
29
|
+
return crc32c.checksum.to_i
|
30
|
+
elsif getOsType == "linux" || getOsType == "unix"
|
31
|
+
require_relative 'lib/linux/crc32c.so'
|
32
|
+
return Crc32c.calculate(data.string, data.length, 0).to_i
|
33
|
+
elsif getOsType == "windows"
|
34
|
+
require_relative 'lib/win/crc32c.so'
|
35
|
+
return Crc32c.calculate(data.string, data.length, 0).to_i
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.getOsType
|
40
|
+
host_os = RbConfig::CONFIG['host_os']
|
41
|
+
case host_os
|
42
|
+
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
43
|
+
"windows"
|
44
|
+
when /linux/
|
45
|
+
"linux"
|
46
|
+
when /solaris|bsd/
|
47
|
+
"unix"
|
48
|
+
else
|
49
|
+
raise Error::WebDriverError, "unspport os"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
Binary file
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
@@ -225,7 +225,7 @@ module OdpsDatahub
|
|
225
225
|
|
226
226
|
res = conn.getResponse
|
227
227
|
if res.code != '200'
|
228
|
-
raise "Add partition failed with error" + res.
|
228
|
+
raise "Add partition failed with error " + res.body
|
229
229
|
end
|
230
230
|
|
231
231
|
if res.to_hash['Content-Length'] != "0" and not res.body.to_s.include?"Instance"
|
@@ -35,6 +35,7 @@ module Fluent
|
|
35
35
|
config_param :aliyun_odps_hub_endpoint, :string, :default => nil
|
36
36
|
config_param :project, :string, :default => nil
|
37
37
|
config_param :format, :string, :default => 'out_file'
|
38
|
+
config_param :enable_fast_crc, :bool, :default => false
|
38
39
|
|
39
40
|
attr_accessor :tables
|
40
41
|
|
@@ -299,6 +300,10 @@ module Fluent
|
|
299
300
|
:aliyun_odps_endpoint => @aliyun_odps_endpoint,
|
300
301
|
:aliyun_odps_hub_endpoint => @aliyun_odps_hub_endpoint,
|
301
302
|
}
|
303
|
+
#init Global setting
|
304
|
+
if (@enable_fast_crc)
|
305
|
+
OdpsDatahub::OdpsConfig::setFastCrc(true)
|
306
|
+
end
|
302
307
|
#初始化各个table object
|
303
308
|
@tables.each { |te|
|
304
309
|
te.init(config)
|
@@ -19,7 +19,7 @@
|
|
19
19
|
require 'stringio'
|
20
20
|
require 'protobuf'
|
21
21
|
require_relative '../exceptions'
|
22
|
-
require_relative '../
|
22
|
+
require_relative '../crc/crc'
|
23
23
|
require_relative '../odps/odps_table'
|
24
24
|
|
25
25
|
module OdpsDatahub
|
@@ -87,10 +87,10 @@ module OdpsDatahub
|
|
87
87
|
end
|
88
88
|
|
89
89
|
def serialize(upStream, recordList)
|
90
|
-
crc32cPack =
|
90
|
+
crc32cPack = StringIO.new
|
91
91
|
if recordList.is_a?Array
|
92
92
|
recordList.each { |record|
|
93
|
-
crc32cRecord =
|
93
|
+
crc32cRecord = StringIO.new
|
94
94
|
schema = OdpsTableSchema.new
|
95
95
|
schema = record.getTableSchema
|
96
96
|
schema.mCols.each { | col |
|
@@ -98,45 +98,45 @@ module OdpsDatahub
|
|
98
98
|
if cellValue == nil
|
99
99
|
next
|
100
100
|
end
|
101
|
-
crc32cRecord.
|
101
|
+
crc32cRecord.write(encodeFixed32(col.mIdx + 1))
|
102
102
|
case col.mType
|
103
103
|
when $ODPS_BIGINT
|
104
|
-
crc32cRecord.
|
104
|
+
crc32cRecord.write(encodeFixed64(cellValue))
|
105
105
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
|
106
106
|
upStream.write(encodeSInt64(cellValue))
|
107
107
|
when $ODPS_DOUBLE
|
108
|
-
crc32cRecord.
|
108
|
+
crc32cRecord.write(encodeDouble(cellValue))
|
109
109
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::FIXED64, upStream)
|
110
110
|
upStream.write(encodeDouble(cellValue))
|
111
111
|
when $ODPS_BOOLEAN
|
112
|
-
crc32cRecord.
|
112
|
+
crc32cRecord.write(encodeBool(cellValue))
|
113
113
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
|
114
114
|
upStream.write(encodeBool(cellValue))
|
115
115
|
when $ODPS_DATETIME
|
116
|
-
crc32cRecord.
|
116
|
+
crc32cRecord.write(encodeFixed64(cellValue))
|
117
117
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
|
118
118
|
upStream.write(encodeDataTime(cellValue))
|
119
119
|
when $ODPS_STRING
|
120
|
-
crc32cRecord.
|
120
|
+
crc32cRecord.write(cellValue)
|
121
121
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
|
122
122
|
upStream.write(encodeString(cellValue))
|
123
123
|
when $ODPS_DECIMAL
|
124
|
-
crc32cRecord.
|
124
|
+
crc32cRecord.write(cellValue)
|
125
125
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
|
126
126
|
upStream.write(encodeString(cellValue))
|
127
127
|
else
|
128
128
|
raise OdpsDatahubException.new($INVALID_ARGUMENT, "invalid mType")
|
129
129
|
end
|
130
130
|
}
|
131
|
-
recordCrc = crc32cRecord
|
131
|
+
recordCrc = CrcCalculator::calculate(crc32cRecord)
|
132
132
|
writeTag($TUNNEL_END_RECORD, ::Protobuf::WireType::VARINT, upStream)
|
133
133
|
upStream.write(encodeUInt32(recordCrc))
|
134
|
-
crc32cPack.
|
134
|
+
crc32cPack.write(encodeFixed32(recordCrc))
|
135
135
|
}
|
136
136
|
writeTag($TUNNEL_META_COUNT, ::Protobuf::WireType::VARINT, upStream)
|
137
137
|
upStream.write(encodeSInt64(recordList.size))
|
138
138
|
writeTag($TUNNEL_META_CHECKSUM, ::Protobuf::WireType::VARINT, upStream)
|
139
|
-
upStream.write(encodeUInt32(crc32cPack
|
139
|
+
upStream.write(encodeUInt32(CrcCalculator::calculate(crc32cPack)))
|
140
140
|
else
|
141
141
|
raise OdpsDatahubException.new($INVALID_ARGUMENT, "param must be a array")
|
142
142
|
end
|
@@ -93,7 +93,7 @@ module OdpsDatahub
|
|
93
93
|
upStream = Zlib::Deflate.deflate(@mUpStream.string)
|
94
94
|
header[$CONTENT_MD5] = Digest::MD5.hexdigest(upStream)
|
95
95
|
header[$CONTENT_LENGTH] = upStream.length.to_s
|
96
|
-
#MAX_LENGTH
|
96
|
+
#MAX_LENGTH 2048*10KB
|
97
97
|
if upStream.length > $MAX_PACK_SIZE
|
98
98
|
raise OdpsDatahubException.new($PACK_SIZE_EXCEED, "pack size:" + upStream.length.to_s)
|
99
99
|
end
|
@@ -109,6 +109,7 @@ module OdpsDatahub
|
|
109
109
|
if res.code != "200"
|
110
110
|
raise OdpsDatahubException.new(json_obj["Code"], "write failed because " + json_obj["Message"])
|
111
111
|
end
|
112
|
+
return json_obj
|
112
113
|
end
|
113
114
|
|
114
115
|
private
|
data/odps_example.conf
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-aliyun-odps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xiao Dong
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -125,15 +125,17 @@ files:
|
|
125
125
|
- CHANGELOG.md
|
126
126
|
- Gemfile
|
127
127
|
- License
|
128
|
-
- README.cn.md
|
129
128
|
- README.md
|
130
129
|
- Rakefile
|
131
130
|
- VERSION
|
132
131
|
- fluent-plugin-aliyun-odps.gemspec
|
133
132
|
- lib/fluent/plugin/conf/config.rb
|
134
|
-
- lib/fluent/plugin/
|
135
|
-
- lib/fluent/plugin/
|
136
|
-
- lib/fluent/plugin/
|
133
|
+
- lib/fluent/plugin/crc/crc.rb
|
134
|
+
- lib/fluent/plugin/crc/lib/linux/crc32c.so
|
135
|
+
- lib/fluent/plugin/crc/lib/win/crc32c.so
|
136
|
+
- lib/fluent/plugin/crc/origin/crc.rb
|
137
|
+
- lib/fluent/plugin/crc/origin/crc32.rb
|
138
|
+
- lib/fluent/plugin/crc/origin/crc32c.rb
|
137
139
|
- lib/fluent/plugin/exceptions.rb
|
138
140
|
- lib/fluent/plugin/http/http_connection.rb
|
139
141
|
- lib/fluent/plugin/http/http_flag.rb
|
data/README.cn.md
DELETED
@@ -1,112 +0,0 @@
|
|
1
|
-
# Aliyun ODPS Plugin for Fluentd
|
2
|
-
|
3
|
-
## ��ʼʹ��
|
4
|
-
---
|
5
|
-
|
6
|
-
### ����
|
7
|
-
|
8
|
-
- �������ݴ�������(Open Data Processing Service�����ODPS)�ǰ���Ͱ������з��ĺ������ݴ���ƽ̨����Ҫ�����������ṹ�����ݵĴ洢�ͼ��㣬�����ṩ�������ݲֿ�Ľ�������Լ���Դ����ݵķ�����ģ����
|
9
|
-
- ODPS DataHub Service(DHS)��һ��ODPS���ڽ��������û��ṩʵʱ���ݵķ���(Publish)�Ͷ���(Subscribe)�Ĺ��ܡ�
|
10
|
-
|
11
|
-
|
12
|
-
### ����Ҫ��
|
13
|
-
|
14
|
-
ʹ�ô˲������Ҫ�߱����»���:
|
15
|
-
|
16
|
-
1. Ruby 2.1.0 �����
|
17
|
-
2. Gem 2.4.5 �����
|
18
|
-
3. Fluentd-0.10.49 ����� (*[Home Page](http://www.fluentd.org/)*)
|
19
|
-
4. Protobuf-3.5.1 �����(Ruby protobuf)
|
20
|
-
|
21
|
-
### GEM��װ
|
22
|
-
|
23
|
-
��ruby gem��װʹ��:
|
24
|
-
|
25
|
-
```
|
26
|
-
$ gem install fluent-plugin-aliyun-odps
|
27
|
-
```
|
28
|
-
|
29
|
-
### Դ�밲װ���
|
30
|
-
|
31
|
-
* ���Ȱ�װ����������fluentd�Լ�protobuf��
|
32
|
-
* Ȼ���Ŀ¼������Fluentd���Ŀ¼�£����磺 ��aliyun-odps-fluentd-plugin/lib/fluent/pluginĿ¼���Ƶ�{YOUR_FLUENTD_DIRECTORY}/lib/fluent/plugin�С�
|
33
|
-
|
34
|
-
```
|
35
|
-
$ gem install protobuf
|
36
|
-
$ gem install fluentd --no-ri --no-rdoc
|
37
|
-
$ git clone https://github.com/aliyun/aliyun-odps-fluentd-plugin.git
|
38
|
-
$ cp aliyun-odps-fluentd-plugin/lib/fluent/plugin/* {YOUR_FLUENTD_DIRECTORY}/lib/fluent/plugin/ -r
|
39
|
-
```
|
40
|
-
|
41
|
-
### ���ʹ��ʾ��
|
42
|
-
|
43
|
-
```
|
44
|
-
<source>
|
45
|
-
type tail
|
46
|
-
path /opt/log/in/in.log
|
47
|
-
pos_file /opt/log/in/in.log.pos
|
48
|
-
refresh_interval 5s
|
49
|
-
tag in.log
|
50
|
-
format /^(?<remote>[^ ]*) - - \[(?<datetime>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "-" "(?<agent>[^\"]*)"$/
|
51
|
-
time_format %Y%b%d %H:%M:%S %z
|
52
|
-
</source>
|
53
|
-
```
|
54
|
-
```
|
55
|
-
<match in.**>
|
56
|
-
type aliyun_odps
|
57
|
-
aliyun_access_id ************
|
58
|
-
aliyun_access_key *********
|
59
|
-
aliyun_odps_endpoint http://service.odps.aliyun.com/api
|
60
|
-
aliyun_odps_hub_endpoint http://dh.odps.aliyun.com
|
61
|
-
buffer_chunk_limit 2m
|
62
|
-
buffer_queue_limit 128
|
63
|
-
flush_interval 5s
|
64
|
-
project your_projectName
|
65
|
-
<table in.log>
|
66
|
-
table your_tableName
|
67
|
-
fields remote,method,path,code,size,agent
|
68
|
-
partition ctime=${datetime.strftime('%Y%m%d')}
|
69
|
-
time_format %d/%b/%Y:%H:%M:%S %z
|
70
|
-
shard_number 1
|
71
|
-
</table>
|
72
|
-
</match>
|
73
|
-
```
|
74
|
-
### ����˵��
|
75
|
-
|
76
|
-
- type(Fixed): �̶�ֵ aliyun_odps.
|
77
|
-
- aliyun_access_id(Required):������access_id.
|
78
|
-
- aliyun_access_key(Required):������access key.
|
79
|
-
- aliyun_odps_hub_endpoint(Required):�����ķ�������ESC�ϣ���ѱ�ֵ�趨Ϊ http://dh-ext.odps.aliyun-inc.com, ��������Ϊ http://dh.odps.aliyun.com.
|
80
|
-
- aliyunodps_endpoint(Required):�����ķ�������ESC�ϣ���ѱ�ֵ�趨Ϊ http://odps-ext.aiyun-inc.com/api, ��������Ϊ http://service.odps.aliyun.com/api .
|
81
|
-
- buffer_chunk_limit(Optional): ���С��֧�֡�k��(KB),��m��(MB),��g��(GB)��λ��Ĭ�� 8MB������ֵ2MB.
|
82
|
-
- buffer_queue_limit(Optional): ����д�С����ֵ��buffer_chunk_limit��ͬ����������������С��
|
83
|
-
- flush_interval(Optional): ǿ�Ʒ��ͼ�����ﵽʱ��������δ����ǿ�Ʒ���, Ĭ�� 60s.
|
84
|
-
- project(Required): project����.
|
85
|
-
- table(Required): table����.
|
86
|
-
- fields(Required): ��source��Ӧ���ֶ������������source֮��.
|
87
|
-
- partition(Optional)����Ϊ�������������ô���.
|
88
|
-
- ������֧�ֵ�����ģʽ:
|
89
|
-
- �̶�ֵ: partition ctime=20150804
|
90
|
-
- �ؼ���: partition ctime=${remote} ������remoteΪsource��ij�ֶΣ�
|
91
|
-
- ʱ���ʽ�ؼ���: partition ctime=${datetime.strftime('%Y%m%d')} ������datetimeΪsource��ijʱ���ʽ�ֶΣ����Ϊ%Y%m%d��ʽ��Ϊ�������ƣ�
|
92
|
-
- time_format(Optional):
|
93
|
-
- ���ʹ��ʱ���ʽ�ؼ���Ϊ<partition>, �����ñ�����. ����: source[datetime]="29/Aug/2015:11:10:16 +0800",������<time_format>Ϊ"%d/%b/%Y:%H:%M:%S %z"
|
94
|
-
- shard_number(Optional):ָ��shard���������������shard[0,shard_number-1]��Χ�ڵ�shard��д�����ݣ�����Ϊ����0��С��table��Ӧshard������������.
|
95
|
-
|
96
|
-
## �ٷ���վ
|
97
|
-
---
|
98
|
-
|
99
|
-
- [Fluentd User Guide](http://docs.fluentd.org/)
|
100
|
-
|
101
|
-
## ����
|
102
|
-
---
|
103
|
-
|
104
|
-
- [Sun Zongtao]()
|
105
|
-
- [Cai Ying]()
|
106
|
-
- [Dong Xiao](https://github.com/dongxiao1198)
|
107
|
-
- [Yang Hongbo](https://github.com/hongbosoftware)
|
108
|
-
|
109
|
-
## License
|
110
|
-
---
|
111
|
-
|
112
|
-
licensed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.html)
|