fluent-plugin-aliyun-odps 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -1
- data/README.md +1 -0
- data/VERSION +1 -1
- data/lib/fluent/plugin/conf/config.rb +5 -0
- data/lib/fluent/plugin/crc/crc.rb +53 -0
- data/lib/fluent/plugin/crc/lib/linux/crc32c.so +0 -0
- data/lib/fluent/plugin/crc/lib/win/crc32c.so +0 -0
- data/lib/fluent/plugin/{digest → crc/origin}/crc.rb +0 -0
- data/lib/fluent/plugin/{digest → crc/origin}/crc32.rb +0 -0
- data/lib/fluent/plugin/{digest → crc/origin}/crc32c.rb +0 -0
- data/lib/fluent/plugin/http/http_connection.rb +1 -0
- data/lib/fluent/plugin/http/http_flag.rb +1 -1
- data/lib/fluent/plugin/odps/odps_table.rb +1 -1
- data/lib/fluent/plugin/out_aliyun_odps.rb +5 -0
- data/lib/fluent/plugin/serialize/deserializer.rb +1 -1
- data/lib/fluent/plugin/serialize/serializer.rb +13 -13
- data/lib/fluent/plugin/stream_writer.rb +2 -1
- data/odps_example.conf +1 -0
- metadata +8 -6
- data/README.cn.md +0 -112
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e49e64231330fc82748d84415440558c8503b30
|
4
|
+
data.tar.gz: e328545513c76cf9f90cded935e7548037ff83ef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a032d3ea1acdfd27f9f002a1d9eacb5a2b516c18f042b9a8f6aa4d6235220bf7e8c2da9aa7bc321a628d96dee364fbbdbef48ec06a97c404b208ea02b66e3ab4
|
7
|
+
data.tar.gz: 5bdc7642e0a3d1d58dcdc0bc6afc1288d152b21703f79db3a66edb3efdd156637481340f280d58d977be596b68fefe09b4773a78355f3827c881c37f19cda484
|
data/CHANGELOG.md
CHANGED
@@ -3,4 +3,6 @@ Fix datetime format bug, support String, DateTime, Time type when write to a dat
|
|
3
3
|
0.0.5
|
4
4
|
Add reload shard when import fails, and remove unload shard operation when shut down.
|
5
5
|
0.0.6
|
6
|
-
Add decimal support��fix string input while setting double and int.
|
6
|
+
Add decimal support��fix string input while setting double and int.
|
7
|
+
0.0.7
|
8
|
+
Add error msg when add partition fail, support fast crc�� remove pack size limit.
|
data/README.md
CHANGED
@@ -98,6 +98,7 @@ $ cp aliyun-odps-fluentd-plugin/lib/fluent/plugin/* {YOUR_FLUENTD_DIRECTORY}/lib
|
|
98
98
|
- time_format(Optional):
|
99
99
|
- if you are using the key words to set your <partition> and the key word is in time format, please set the param <time_format>. example: source[datetime] = "29/Aug/2015:11:10:16 +0800", and the param <time_format> is "%d/%b/%Y:%H:%M:%S %z"
|
100
100
|
- shard_number(Optional): will write data to shards between [0,shard_number-1], this config must more than 0 and less than the max shard number of your table.
|
101
|
+
- enable_fast_crc(Optional): use fast crc.so to calculate crc, this will improve speed up a lot, but this is not supported in some os.
|
101
102
|
|
102
103
|
## Useful Links
|
103
104
|
---
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
@@ -17,6 +17,7 @@
|
|
17
17
|
#under the License.
|
18
18
|
#
|
19
19
|
module OdpsDatahub
|
20
|
+
$USE_FAST_CRC = false
|
20
21
|
class OdpsConfig
|
21
22
|
attr_accessor :accessId, :accessKey, :odpsEndpoint, :datahubEndpoint, :defaultProjectName, :userAgent
|
22
23
|
|
@@ -28,5 +29,9 @@ module OdpsDatahub
|
|
28
29
|
@defaultProject = defaultProjectName
|
29
30
|
@userAgent = ""
|
30
31
|
end
|
32
|
+
|
33
|
+
def self.setFastCrc(value)
|
34
|
+
$USE_FAST_CRC = value
|
35
|
+
end
|
31
36
|
end
|
32
37
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
require 'rbconfig'
|
20
|
+
module OdpsDatahub
|
21
|
+
class CrcCalculator
|
22
|
+
# @param [StringIO] data
|
23
|
+
# @return crc32c to_i
|
24
|
+
def self.calculate(data)
|
25
|
+
if (!$USE_FAST_CRC)
|
26
|
+
require_relative 'origin/crc32c'
|
27
|
+
crc32c = Digest::CRC32c.new
|
28
|
+
crc32c.update(data.string)
|
29
|
+
return crc32c.checksum.to_i
|
30
|
+
elsif getOsType == "linux" || getOsType == "unix"
|
31
|
+
require_relative 'lib/linux/crc32c.so'
|
32
|
+
return Crc32c.calculate(data.string, data.length, 0).to_i
|
33
|
+
elsif getOsType == "windows"
|
34
|
+
require_relative 'lib/win/crc32c.so'
|
35
|
+
return Crc32c.calculate(data.string, data.length, 0).to_i
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.getOsType
|
40
|
+
host_os = RbConfig::CONFIG['host_os']
|
41
|
+
case host_os
|
42
|
+
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
43
|
+
"windows"
|
44
|
+
when /linux/
|
45
|
+
"linux"
|
46
|
+
when /solaris|bsd/
|
47
|
+
"unix"
|
48
|
+
else
|
49
|
+
raise Error::WebDriverError, "unspport os"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
Binary file
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
@@ -225,7 +225,7 @@ module OdpsDatahub
|
|
225
225
|
|
226
226
|
res = conn.getResponse
|
227
227
|
if res.code != '200'
|
228
|
-
raise "Add partition failed with error" + res.
|
228
|
+
raise "Add partition failed with error " + res.body
|
229
229
|
end
|
230
230
|
|
231
231
|
if res.to_hash['Content-Length'] != "0" and not res.body.to_s.include?"Instance"
|
@@ -35,6 +35,7 @@ module Fluent
|
|
35
35
|
config_param :aliyun_odps_hub_endpoint, :string, :default => nil
|
36
36
|
config_param :project, :string, :default => nil
|
37
37
|
config_param :format, :string, :default => 'out_file'
|
38
|
+
config_param :enable_fast_crc, :bool, :default => false
|
38
39
|
|
39
40
|
attr_accessor :tables
|
40
41
|
|
@@ -299,6 +300,10 @@ module Fluent
|
|
299
300
|
:aliyun_odps_endpoint => @aliyun_odps_endpoint,
|
300
301
|
:aliyun_odps_hub_endpoint => @aliyun_odps_hub_endpoint,
|
301
302
|
}
|
303
|
+
#init Global setting
|
304
|
+
if (@enable_fast_crc)
|
305
|
+
OdpsDatahub::OdpsConfig::setFastCrc(true)
|
306
|
+
end
|
302
307
|
#初始化各个table object
|
303
308
|
@tables.each { |te|
|
304
309
|
te.init(config)
|
@@ -19,7 +19,7 @@
|
|
19
19
|
require 'stringio'
|
20
20
|
require 'protobuf'
|
21
21
|
require_relative '../exceptions'
|
22
|
-
require_relative '../
|
22
|
+
require_relative '../crc/crc'
|
23
23
|
require_relative '../odps/odps_table'
|
24
24
|
|
25
25
|
module OdpsDatahub
|
@@ -87,10 +87,10 @@ module OdpsDatahub
|
|
87
87
|
end
|
88
88
|
|
89
89
|
def serialize(upStream, recordList)
|
90
|
-
crc32cPack =
|
90
|
+
crc32cPack = StringIO.new
|
91
91
|
if recordList.is_a?Array
|
92
92
|
recordList.each { |record|
|
93
|
-
crc32cRecord =
|
93
|
+
crc32cRecord = StringIO.new
|
94
94
|
schema = OdpsTableSchema.new
|
95
95
|
schema = record.getTableSchema
|
96
96
|
schema.mCols.each { | col |
|
@@ -98,45 +98,45 @@ module OdpsDatahub
|
|
98
98
|
if cellValue == nil
|
99
99
|
next
|
100
100
|
end
|
101
|
-
crc32cRecord.
|
101
|
+
crc32cRecord.write(encodeFixed32(col.mIdx + 1))
|
102
102
|
case col.mType
|
103
103
|
when $ODPS_BIGINT
|
104
|
-
crc32cRecord.
|
104
|
+
crc32cRecord.write(encodeFixed64(cellValue))
|
105
105
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
|
106
106
|
upStream.write(encodeSInt64(cellValue))
|
107
107
|
when $ODPS_DOUBLE
|
108
|
-
crc32cRecord.
|
108
|
+
crc32cRecord.write(encodeDouble(cellValue))
|
109
109
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::FIXED64, upStream)
|
110
110
|
upStream.write(encodeDouble(cellValue))
|
111
111
|
when $ODPS_BOOLEAN
|
112
|
-
crc32cRecord.
|
112
|
+
crc32cRecord.write(encodeBool(cellValue))
|
113
113
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
|
114
114
|
upStream.write(encodeBool(cellValue))
|
115
115
|
when $ODPS_DATETIME
|
116
|
-
crc32cRecord.
|
116
|
+
crc32cRecord.write(encodeFixed64(cellValue))
|
117
117
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
|
118
118
|
upStream.write(encodeDataTime(cellValue))
|
119
119
|
when $ODPS_STRING
|
120
|
-
crc32cRecord.
|
120
|
+
crc32cRecord.write(cellValue)
|
121
121
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
|
122
122
|
upStream.write(encodeString(cellValue))
|
123
123
|
when $ODPS_DECIMAL
|
124
|
-
crc32cRecord.
|
124
|
+
crc32cRecord.write(cellValue)
|
125
125
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
|
126
126
|
upStream.write(encodeString(cellValue))
|
127
127
|
else
|
128
128
|
raise OdpsDatahubException.new($INVALID_ARGUMENT, "invalid mType")
|
129
129
|
end
|
130
130
|
}
|
131
|
-
recordCrc = crc32cRecord
|
131
|
+
recordCrc = CrcCalculator::calculate(crc32cRecord)
|
132
132
|
writeTag($TUNNEL_END_RECORD, ::Protobuf::WireType::VARINT, upStream)
|
133
133
|
upStream.write(encodeUInt32(recordCrc))
|
134
|
-
crc32cPack.
|
134
|
+
crc32cPack.write(encodeFixed32(recordCrc))
|
135
135
|
}
|
136
136
|
writeTag($TUNNEL_META_COUNT, ::Protobuf::WireType::VARINT, upStream)
|
137
137
|
upStream.write(encodeSInt64(recordList.size))
|
138
138
|
writeTag($TUNNEL_META_CHECKSUM, ::Protobuf::WireType::VARINT, upStream)
|
139
|
-
upStream.write(encodeUInt32(crc32cPack
|
139
|
+
upStream.write(encodeUInt32(CrcCalculator::calculate(crc32cPack)))
|
140
140
|
else
|
141
141
|
raise OdpsDatahubException.new($INVALID_ARGUMENT, "param must be a array")
|
142
142
|
end
|
@@ -93,7 +93,7 @@ module OdpsDatahub
|
|
93
93
|
upStream = Zlib::Deflate.deflate(@mUpStream.string)
|
94
94
|
header[$CONTENT_MD5] = Digest::MD5.hexdigest(upStream)
|
95
95
|
header[$CONTENT_LENGTH] = upStream.length.to_s
|
96
|
-
#MAX_LENGTH
|
96
|
+
#MAX_LENGTH 2048*10KB
|
97
97
|
if upStream.length > $MAX_PACK_SIZE
|
98
98
|
raise OdpsDatahubException.new($PACK_SIZE_EXCEED, "pack size:" + upStream.length.to_s)
|
99
99
|
end
|
@@ -109,6 +109,7 @@ module OdpsDatahub
|
|
109
109
|
if res.code != "200"
|
110
110
|
raise OdpsDatahubException.new(json_obj["Code"], "write failed because " + json_obj["Message"])
|
111
111
|
end
|
112
|
+
return json_obj
|
112
113
|
end
|
113
114
|
|
114
115
|
private
|
data/odps_example.conf
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-aliyun-odps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xiao Dong
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-12-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -125,15 +125,17 @@ files:
|
|
125
125
|
- CHANGELOG.md
|
126
126
|
- Gemfile
|
127
127
|
- License
|
128
|
-
- README.cn.md
|
129
128
|
- README.md
|
130
129
|
- Rakefile
|
131
130
|
- VERSION
|
132
131
|
- fluent-plugin-aliyun-odps.gemspec
|
133
132
|
- lib/fluent/plugin/conf/config.rb
|
134
|
-
- lib/fluent/plugin/
|
135
|
-
- lib/fluent/plugin/
|
136
|
-
- lib/fluent/plugin/
|
133
|
+
- lib/fluent/plugin/crc/crc.rb
|
134
|
+
- lib/fluent/plugin/crc/lib/linux/crc32c.so
|
135
|
+
- lib/fluent/plugin/crc/lib/win/crc32c.so
|
136
|
+
- lib/fluent/plugin/crc/origin/crc.rb
|
137
|
+
- lib/fluent/plugin/crc/origin/crc32.rb
|
138
|
+
- lib/fluent/plugin/crc/origin/crc32c.rb
|
137
139
|
- lib/fluent/plugin/exceptions.rb
|
138
140
|
- lib/fluent/plugin/http/http_connection.rb
|
139
141
|
- lib/fluent/plugin/http/http_flag.rb
|
data/README.cn.md
DELETED
@@ -1,112 +0,0 @@
|
|
1
|
-
# Aliyun ODPS Plugin for Fluentd
|
2
|
-
|
3
|
-
## ��ʼʹ��
|
4
|
-
---
|
5
|
-
|
6
|
-
### ����
|
7
|
-
|
8
|
-
- �������ݴ�������(Open Data Processing Service�����ODPS)�ǰ���Ͱ������з��ĺ������ݴ���ƽ̨����Ҫ�����������ṹ�����ݵĴ洢�ͼ��㣬�����ṩ�������ݲֿ�Ľ�������Լ���Դ����ݵķ�����ģ����
|
9
|
-
- ODPS DataHub Service(DHS)��һ��ODPS���ڽ��������û��ṩʵʱ���ݵķ���(Publish)�Ͷ���(Subscribe)�Ĺ��ܡ�
|
10
|
-
|
11
|
-
|
12
|
-
### ����Ҫ��
|
13
|
-
|
14
|
-
ʹ�ô˲������Ҫ�߱����»���:
|
15
|
-
|
16
|
-
1. Ruby 2.1.0 �����
|
17
|
-
2. Gem 2.4.5 �����
|
18
|
-
3. Fluentd-0.10.49 ����� (*[Home Page](http://www.fluentd.org/)*)
|
19
|
-
4. Protobuf-3.5.1 �����(Ruby protobuf)
|
20
|
-
|
21
|
-
### GEM��װ
|
22
|
-
|
23
|
-
��ruby gem��װʹ��:
|
24
|
-
|
25
|
-
```
|
26
|
-
$ gem install fluent-plugin-aliyun-odps
|
27
|
-
```
|
28
|
-
|
29
|
-
### Դ�밲װ���
|
30
|
-
|
31
|
-
* ���Ȱ�װ����������fluentd�Լ�protobuf��
|
32
|
-
* Ȼ���Ŀ¼������Fluentd���Ŀ¼�£����磺 ��aliyun-odps-fluentd-plugin/lib/fluent/pluginĿ¼���Ƶ�{YOUR_FLUENTD_DIRECTORY}/lib/fluent/plugin�С�
|
33
|
-
|
34
|
-
```
|
35
|
-
$ gem install protobuf
|
36
|
-
$ gem install fluentd --no-ri --no-rdoc
|
37
|
-
$ git clone https://github.com/aliyun/aliyun-odps-fluentd-plugin.git
|
38
|
-
$ cp aliyun-odps-fluentd-plugin/lib/fluent/plugin/* {YOUR_FLUENTD_DIRECTORY}/lib/fluent/plugin/ -r
|
39
|
-
```
|
40
|
-
|
41
|
-
### ���ʹ��ʾ��
|
42
|
-
|
43
|
-
```
|
44
|
-
<source>
|
45
|
-
type tail
|
46
|
-
path /opt/log/in/in.log
|
47
|
-
pos_file /opt/log/in/in.log.pos
|
48
|
-
refresh_interval 5s
|
49
|
-
tag in.log
|
50
|
-
format /^(?<remote>[^ ]*) - - \[(?<datetime>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "-" "(?<agent>[^\"]*)"$/
|
51
|
-
time_format %Y%b%d %H:%M:%S %z
|
52
|
-
</source>
|
53
|
-
```
|
54
|
-
```
|
55
|
-
<match in.**>
|
56
|
-
type aliyun_odps
|
57
|
-
aliyun_access_id ************
|
58
|
-
aliyun_access_key *********
|
59
|
-
aliyun_odps_endpoint http://service.odps.aliyun.com/api
|
60
|
-
aliyun_odps_hub_endpoint http://dh.odps.aliyun.com
|
61
|
-
buffer_chunk_limit 2m
|
62
|
-
buffer_queue_limit 128
|
63
|
-
flush_interval 5s
|
64
|
-
project your_projectName
|
65
|
-
<table in.log>
|
66
|
-
table your_tableName
|
67
|
-
fields remote,method,path,code,size,agent
|
68
|
-
partition ctime=${datetime.strftime('%Y%m%d')}
|
69
|
-
time_format %d/%b/%Y:%H:%M:%S %z
|
70
|
-
shard_number 1
|
71
|
-
</table>
|
72
|
-
</match>
|
73
|
-
```
|
74
|
-
### ����˵��
|
75
|
-
|
76
|
-
- type(Fixed): �̶�ֵ aliyun_odps.
|
77
|
-
- aliyun_access_id(Required):������access_id.
|
78
|
-
- aliyun_access_key(Required):������access key.
|
79
|
-
- aliyun_odps_hub_endpoint(Required):�����ķ�������ESC�ϣ���ѱ�ֵ�趨Ϊ http://dh-ext.odps.aliyun-inc.com, ��������Ϊ http://dh.odps.aliyun.com.
|
80
|
-
- aliyunodps_endpoint(Required):�����ķ�������ESC�ϣ���ѱ�ֵ�趨Ϊ http://odps-ext.aiyun-inc.com/api, ��������Ϊ http://service.odps.aliyun.com/api .
|
81
|
-
- buffer_chunk_limit(Optional): ���С��֧�֡�k��(KB),��m��(MB),��g��(GB)��λ��Ĭ�� 8MB������ֵ2MB.
|
82
|
-
- buffer_queue_limit(Optional): ����д�С����ֵ��buffer_chunk_limit��ͬ����������������С��
|
83
|
-
- flush_interval(Optional): ǿ�Ʒ��ͼ�����ﵽʱ��������δ����ǿ�Ʒ���, Ĭ�� 60s.
|
84
|
-
- project(Required): project����.
|
85
|
-
- table(Required): table����.
|
86
|
-
- fields(Required): ��source��Ӧ���ֶ������������source֮��.
|
87
|
-
- partition(Optional)����Ϊ�������������ô���.
|
88
|
-
- ������֧�ֵ�����ģʽ:
|
89
|
-
- �̶�ֵ: partition ctime=20150804
|
90
|
-
- �ؼ���: partition ctime=${remote} ������remoteΪsource��ij�ֶΣ�
|
91
|
-
- ʱ���ʽ�ؼ���: partition ctime=${datetime.strftime('%Y%m%d')} ������datetimeΪsource��ijʱ���ʽ�ֶΣ����Ϊ%Y%m%d��ʽ��Ϊ�������ƣ�
|
92
|
-
- time_format(Optional):
|
93
|
-
- ���ʹ��ʱ���ʽ�ؼ���Ϊ<partition>, �����ñ�����. ����: source[datetime]="29/Aug/2015:11:10:16 +0800",������<time_format>Ϊ"%d/%b/%Y:%H:%M:%S %z"
|
94
|
-
- shard_number(Optional):ָ��shard���������������shard[0,shard_number-1]��Χ�ڵ�shard��д�����ݣ�����Ϊ����0��С��table��Ӧshard������������.
|
95
|
-
|
96
|
-
## �ٷ���վ
|
97
|
-
---
|
98
|
-
|
99
|
-
- [Fluentd User Guide](http://docs.fluentd.org/)
|
100
|
-
|
101
|
-
## ����
|
102
|
-
---
|
103
|
-
|
104
|
-
- [Sun Zongtao]()
|
105
|
-
- [Cai Ying]()
|
106
|
-
- [Dong Xiao](https://github.com/dongxiao1198)
|
107
|
-
- [Yang Hongbo](https://github.com/hongbosoftware)
|
108
|
-
|
109
|
-
## License
|
110
|
-
---
|
111
|
-
|
112
|
-
licensed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.html)
|