fluent-plugin-aliyun-odps 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.cn.md +5 -1
- data/VERSION +1 -1
- data/lib/fluent/plugin/conf/config.rb +9 -0
- data/lib/fluent/plugin/crc/crc.rb +1 -1
- data/lib/fluent/plugin/out_aliyun_odps.rb +4 -0
- data/lib/fluent/plugin/serialize/serializer.rb +5 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06cc7b0378693eab79c346e913747092b1ef84af
|
4
|
+
data.tar.gz: 8fe96f58093519df5ddf21e1cf4f8b1aaa71f0de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b2306b8bcc789d32b3bceb6640c5ebb8dfa23524df89020026c4693415ac0d445abf0cefee8f2c28f1595853549868eb92ba85ba78988d1f0b4148b0c6336d1
|
7
|
+
data.tar.gz: 835ef6d097a2599dc1839d7db5957b38755b09416020370c609c99c18fa51560e407a116becfcdf287d6ee472baccdd958ee90454342832781edbadcb75d5fd2
|
data/CHANGELOG.md
CHANGED
@@ -19,4 +19,8 @@ Use XStreamPack.
|
|
19
19
|
0.1.3
|
20
20
|
Drop record with error log when parse partition failed.
|
21
21
|
0.1.5
|
22
|
-
Fix string encode replace unknow char
|
22
|
+
Fix string encode replace unknow char
|
23
|
+
0.1.6
|
24
|
+
Fix raise exception bug
|
25
|
+
0.1.7
|
26
|
+
Add data_encoding config to format data.
|
data/README.cn.md
CHANGED
@@ -127,6 +127,7 @@ select * from students;
|
|
127
127
|
flush_interval 5s
|
128
128
|
project your_projectName
|
129
129
|
enable_fast_crc true
|
130
|
+
data_encoding UTF-8
|
130
131
|
<table nginx.access>
|
131
132
|
table nginx_logs #对应日志写入的odps表
|
132
133
|
fields remote,method,path,code,size,agent,requesttime
|
@@ -170,6 +171,7 @@ select * from students;
|
|
170
171
|
- retry_time(Optional): 发送每个pack数据时内置重试次数,默认3次.
|
171
172
|
- retry_interval(Optional): 重试间隔,默认1s.
|
172
173
|
- abandon_mode(Optional): 默认为false,设置成true会在重试retry_time后抛弃该数据包,否则会将异常抛送给fluentd,利用fluentd的重试机制重试,这种情况可能会导致数据重复.
|
174
|
+
- data_encoding(Optional): 默认使用源数据标示的encode方式,根据string.encoding获取,如果需要指定源数据编码方式,请设置该值,支持的类型:"US-ASCII", "ASCII-8BIT", "UTF-8", "ISO-8859-1", "Shift_JIS", "EUC-JP", "Windows-31J", "BINARY", "CP932", "eucJP"
|
173
175
|
|
174
176
|
## 常见使用问题以及异常描述
|
175
177
|
---
|
@@ -193,7 +195,9 @@ select * from students;
|
|
193
195
|
- 这个错误信息抛出代表解析partition过程出现问题,请检查partition配置,如果数据中存在脏数据也可能遇到这个问题。
|
194
196
|
* 如何更改为淘宝源RubyGem?
|
195
197
|
- RubyGems 镜像[https://ruby.taobao.org/]
|
196
|
-
|
198
|
+
* 导入数据抛异常"\xE8" from ASCII-8BIT to UTF-8 (Encoding::UndefinedConversionError)
|
199
|
+
- 该错误往往由于source插件在读取数据时,数据真实编码为utf-8,但是transport给fluend的string.encoding却为ASCII-8BIT导致类似错误,这种情况需要设置data_encoding来进行转码。
|
200
|
+
|
197
201
|
## 官方网站
|
198
202
|
- [Fluentd User Guide](http://docs.fluentd.org/)
|
199
203
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.7
|
@@ -18,6 +18,7 @@
|
|
18
18
|
#
|
19
19
|
module OdpsDatahub
|
20
20
|
$USE_FAST_CRC = false
|
21
|
+
$DATA_ENCODE = nil
|
21
22
|
class OdpsConfig
|
22
23
|
attr_accessor :accessId, :accessKey, :odpsEndpoint, :datahubEndpoint, :defaultProjectName, :userAgent
|
23
24
|
|
@@ -33,5 +34,13 @@ module OdpsDatahub
|
|
33
34
|
def self.setFastCrc(value)
|
34
35
|
$USE_FAST_CRC = value
|
35
36
|
end
|
37
|
+
|
38
|
+
def self.setEncode(value)
|
39
|
+
if ["US-ASCII", "ASCII-8BIT", "UTF-8", "ISO-8859-1", "Shift_JIS", "EUC-JP", "Windows-31J", "BINARY", "CP932", "eucJP"].include?(value)
|
40
|
+
$DATA_ENCODE = value
|
41
|
+
else
|
42
|
+
raise "Unsupported encoding type."
|
43
|
+
end
|
44
|
+
end
|
36
45
|
end
|
37
46
|
end
|
@@ -36,6 +36,7 @@ module Fluent
|
|
36
36
|
config_param :project, :string, :default => nil
|
37
37
|
config_param :format, :string, :default => 'out_file'
|
38
38
|
config_param :enable_fast_crc, :bool, :default => false
|
39
|
+
config_param :data_encoding, :string, :default => nil
|
39
40
|
|
40
41
|
attr_accessor :tables
|
41
42
|
|
@@ -328,6 +329,9 @@ module Fluent
|
|
328
329
|
raise e.to_s
|
329
330
|
end
|
330
331
|
end
|
332
|
+
if (@data_encoding != nil)
|
333
|
+
OdpsDatahub::OdpsConfig::setEncode(@data_encoding)
|
334
|
+
end
|
331
335
|
#初始化各个table object
|
332
336
|
@tables.each { |te|
|
333
337
|
te.init(config)
|
@@ -61,7 +61,6 @@ module OdpsDatahub
|
|
61
61
|
end
|
62
62
|
|
63
63
|
def encodeString(value)
|
64
|
-
value.encode!(::Protobuf::Field::StringField::ENCODING, :invalid => :replace, :undef => :replace, :replace => "")
|
65
64
|
value.force_encoding(::Protobuf::Field::BytesField::BYTES_ENCODING)
|
66
65
|
string_bytes = ::Protobuf::Field::VarintField.encode(value.size)
|
67
66
|
string_bytes << value
|
@@ -116,7 +115,11 @@ module OdpsDatahub
|
|
116
115
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
|
117
116
|
upStream.write(encodeDataTime(cellValue))
|
118
117
|
when $ODPS_STRING
|
119
|
-
|
118
|
+
if $DATA_ENCODE != nil
|
119
|
+
cellValue.encode!(::Protobuf::Field::StringField::ENCODING, $DATA_ENCODE)
|
120
|
+
else
|
121
|
+
cellValue.encode!(::Protobuf::Field::StringField::ENCODING)
|
122
|
+
end
|
120
123
|
encode_str = encodeString(cellValue)
|
121
124
|
crc32cRecord.write(cellValue)
|
122
125
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-aliyun-odps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xiao Dong
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-06-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|