fluent-plugin-aliyun-odps 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.cn.md +5 -1
- data/VERSION +1 -1
- data/lib/fluent/plugin/conf/config.rb +9 -0
- data/lib/fluent/plugin/crc/crc.rb +1 -1
- data/lib/fluent/plugin/out_aliyun_odps.rb +4 -0
- data/lib/fluent/plugin/serialize/serializer.rb +5 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06cc7b0378693eab79c346e913747092b1ef84af
|
4
|
+
data.tar.gz: 8fe96f58093519df5ddf21e1cf4f8b1aaa71f0de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b2306b8bcc789d32b3bceb6640c5ebb8dfa23524df89020026c4693415ac0d445abf0cefee8f2c28f1595853549868eb92ba85ba78988d1f0b4148b0c6336d1
|
7
|
+
data.tar.gz: 835ef6d097a2599dc1839d7db5957b38755b09416020370c609c99c18fa51560e407a116becfcdf287d6ee472baccdd958ee90454342832781edbadcb75d5fd2
|
data/CHANGELOG.md
CHANGED
@@ -19,4 +19,8 @@ Use XStreamPack.
|
|
19
19
|
0.1.3
|
20
20
|
Drop record with error log when parse partition failed.
|
21
21
|
0.1.5
|
22
|
-
Fix string encode replace unknow char
|
22
|
+
Fix string encode replace unknow char
|
23
|
+
0.1.6
|
24
|
+
Fix raise exception bug
|
25
|
+
0.1.7
|
26
|
+
Add data_encoding config to format data.
|
data/README.cn.md
CHANGED
@@ -127,6 +127,7 @@ select * from students;
|
|
127
127
|
flush_interval 5s
|
128
128
|
project your_projectName
|
129
129
|
enable_fast_crc true
|
130
|
+
data_encoding UTF-8
|
130
131
|
<table nginx.access>
|
131
132
|
table nginx_logs #对应日志写入的odps表
|
132
133
|
fields remote,method,path,code,size,agent,requesttime
|
@@ -170,6 +171,7 @@ select * from students;
|
|
170
171
|
- retry_time(Optional): 发送每个pack数据时内置重试次数,默认3次.
|
171
172
|
- retry_interval(Optional): 重试间隔,默认1s.
|
172
173
|
- abandon_mode(Optional): 默认为false,设置成true会在重试retry_time后抛弃该数据包,否则会将异常抛送给fluentd,利用fluentd的重试机制重试,这种情况可能会导致数据重复.
|
174
|
+
- data_encoding(Optional): 默认使用源数据标示的encode方式,根据string.encoding获取,如果需要指定源数据编码方式,请设置该值,支持的类型:"US-ASCII", "ASCII-8BIT", "UTF-8", "ISO-8859-1", "Shift_JIS", "EUC-JP", "Windows-31J", "BINARY", "CP932", "eucJP"
|
173
175
|
|
174
176
|
## 常见使用问题以及异常描述
|
175
177
|
---
|
@@ -193,7 +195,9 @@ select * from students;
|
|
193
195
|
- 这个错误信息抛出代表解析partition过程出现问题,请检查partition配置,如果数据中存在脏数据也可能遇到这个问题。
|
194
196
|
* 如何更改为淘宝源RubyGem?
|
195
197
|
- RubyGems 镜像[https://ruby.taobao.org/]
|
196
|
-
|
198
|
+
* 导入数据抛异常"\xE8" from ASCII-8BIT to UTF-8 (Encoding::UndefinedConversionError)
|
199
|
+
- 该错误往往由于source插件在读取数据时,数据真实编码为utf-8,但是transport给fluend的string.encoding却为ASCII-8BIT导致类似错误,这种情况需要设置data_encoding来进行转码。
|
200
|
+
|
197
201
|
## 官方网站
|
198
202
|
- [Fluentd User Guide](http://docs.fluentd.org/)
|
199
203
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.7
|
@@ -18,6 +18,7 @@
|
|
18
18
|
#
|
19
19
|
module OdpsDatahub
|
20
20
|
$USE_FAST_CRC = false
|
21
|
+
$DATA_ENCODE = nil
|
21
22
|
class OdpsConfig
|
22
23
|
attr_accessor :accessId, :accessKey, :odpsEndpoint, :datahubEndpoint, :defaultProjectName, :userAgent
|
23
24
|
|
@@ -33,5 +34,13 @@ module OdpsDatahub
|
|
33
34
|
def self.setFastCrc(value)
|
34
35
|
$USE_FAST_CRC = value
|
35
36
|
end
|
37
|
+
|
38
|
+
def self.setEncode(value)
|
39
|
+
if ["US-ASCII", "ASCII-8BIT", "UTF-8", "ISO-8859-1", "Shift_JIS", "EUC-JP", "Windows-31J", "BINARY", "CP932", "eucJP"].include?(value)
|
40
|
+
$DATA_ENCODE = value
|
41
|
+
else
|
42
|
+
raise "Unsupported encoding type."
|
43
|
+
end
|
44
|
+
end
|
36
45
|
end
|
37
46
|
end
|
@@ -36,6 +36,7 @@ module Fluent
|
|
36
36
|
config_param :project, :string, :default => nil
|
37
37
|
config_param :format, :string, :default => 'out_file'
|
38
38
|
config_param :enable_fast_crc, :bool, :default => false
|
39
|
+
config_param :data_encoding, :string, :default => nil
|
39
40
|
|
40
41
|
attr_accessor :tables
|
41
42
|
|
@@ -328,6 +329,9 @@ module Fluent
|
|
328
329
|
raise e.to_s
|
329
330
|
end
|
330
331
|
end
|
332
|
+
if (@data_encoding != nil)
|
333
|
+
OdpsDatahub::OdpsConfig::setEncode(@data_encoding)
|
334
|
+
end
|
331
335
|
#初始化各个table object
|
332
336
|
@tables.each { |te|
|
333
337
|
te.init(config)
|
@@ -61,7 +61,6 @@ module OdpsDatahub
|
|
61
61
|
end
|
62
62
|
|
63
63
|
def encodeString(value)
|
64
|
-
value.encode!(::Protobuf::Field::StringField::ENCODING, :invalid => :replace, :undef => :replace, :replace => "")
|
65
64
|
value.force_encoding(::Protobuf::Field::BytesField::BYTES_ENCODING)
|
66
65
|
string_bytes = ::Protobuf::Field::VarintField.encode(value.size)
|
67
66
|
string_bytes << value
|
@@ -116,7 +115,11 @@ module OdpsDatahub
|
|
116
115
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
|
117
116
|
upStream.write(encodeDataTime(cellValue))
|
118
117
|
when $ODPS_STRING
|
119
|
-
|
118
|
+
if $DATA_ENCODE != nil
|
119
|
+
cellValue.encode!(::Protobuf::Field::StringField::ENCODING, $DATA_ENCODE)
|
120
|
+
else
|
121
|
+
cellValue.encode!(::Protobuf::Field::StringField::ENCODING)
|
122
|
+
end
|
120
123
|
encode_str = encodeString(cellValue)
|
121
124
|
crc32cRecord.write(cellValue)
|
122
125
|
writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-aliyun-odps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xiao Dong
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-06-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|