fluent-plugin-aliyun-odps 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3557dd3c4653efb184fb04200769c5a9f53a6947
4
- data.tar.gz: 6e35787b0de40f61db279b5d640209e8d800d5e9
3
+ metadata.gz: 06cc7b0378693eab79c346e913747092b1ef84af
4
+ data.tar.gz: 8fe96f58093519df5ddf21e1cf4f8b1aaa71f0de
5
5
  SHA512:
6
- metadata.gz: 7e37bd48d24117a0968f3fd6646b3ce8c64f133138410cbebcae287fe8ccfb6b4a8d1ebe57dd140e8c25fe1a4d9a430ef4028647508cd1514ab4283d29a6a1ad
7
- data.tar.gz: 8112131d1bb7e0c09b31f98af58e2b28370107a5e715b5f1c95f5e5811cac1bafb3b692cbb9f2367ba2105db4bdfcc7be193996b2f0c93be723b1b190a2190b5
6
+ metadata.gz: 6b2306b8bcc789d32b3bceb6640c5ebb8dfa23524df89020026c4693415ac0d445abf0cefee8f2c28f1595853549868eb92ba85ba78988d1f0b4148b0c6336d1
7
+ data.tar.gz: 835ef6d097a2599dc1839d7db5957b38755b09416020370c609c99c18fa51560e407a116becfcdf287d6ee472baccdd958ee90454342832781edbadcb75d5fd2
@@ -19,4 +19,8 @@ Use XStreamPack.
19
19
  0.1.3
20
20
  Drop record with error log when parse partition failed.
21
21
  0.1.5
22
- Fix string encode replace unknow char
22
+ Fix string encode replace unknow char
23
+ 0.1.6
24
+ Fix raise exception bug
25
+ 0.1.7
26
+ Add data_encoding config to format data.
@@ -127,6 +127,7 @@ select * from students;
127
127
  flush_interval 5s
128
128
  project your_projectName
129
129
  enable_fast_crc true
130
+ data_encoding UTF-8
130
131
  <table nginx.access>
131
132
  table nginx_logs #对应日志写入的odps表
132
133
  fields remote,method,path,code,size,agent,requesttime
@@ -170,6 +171,7 @@ select * from students;
170
171
  - retry_time(Optional): 发送每个pack数据时内置重试次数,默认3次.
171
172
  - retry_interval(Optional): 重试间隔,默认1s.
172
173
  - abandon_mode(Optional): 默认为false,设置成true会在重试retry_time后抛弃该数据包,否则会将异常抛送给fluentd,利用fluentd的重试机制重试,这种情况可能会导致数据重复.
174
+ - data_encoding(Optional): 默认使用源数据标示的encode方式,根据string.encoding获取,如果需要指定源数据编码方式,请设置该值,支持的类型:"US-ASCII", "ASCII-8BIT", "UTF-8", "ISO-8859-1", "Shift_JIS", "EUC-JP", "Windows-31J", "BINARY", "CP932", "eucJP"
173
175
 
174
176
  ## 常见使用问题以及异常描述
175
177
  ---
@@ -193,7 +195,9 @@ select * from students;
193
195
  - 这个错误信息抛出代表解析partition过程出现问题,请检查partition配置,如果数据中存在脏数据也可能遇到这个问题。
194
196
  * 如何更改为淘宝源RubyGem?
195
197
  - RubyGems 镜像[https://ruby.taobao.org/]
196
-
198
+ * 导入数据抛异常"\xE8" from ASCII-8BIT to UTF-8 (Encoding::UndefinedConversionError)
199
+ - 该错误往往由于source插件在读取数据时,数据真实编码为utf-8,但是transport给fluend的string.encoding却为ASCII-8BIT导致类似错误,这种情况需要设置data_encoding来进行转码。
200
+
197
201
  ## 官方网站
198
202
  - [Fluentd User Guide](http://docs.fluentd.org/)
199
203
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 0.1.7
@@ -18,6 +18,7 @@
18
18
  #
19
19
  module OdpsDatahub
20
20
  $USE_FAST_CRC = false
21
+ $DATA_ENCODE = nil
21
22
  class OdpsConfig
22
23
  attr_accessor :accessId, :accessKey, :odpsEndpoint, :datahubEndpoint, :defaultProjectName, :userAgent
23
24
 
@@ -33,5 +34,13 @@ module OdpsDatahub
33
34
  def self.setFastCrc(value)
34
35
  $USE_FAST_CRC = value
35
36
  end
37
+
38
+ def self.setEncode(value)
39
+ if ["US-ASCII", "ASCII-8BIT", "UTF-8", "ISO-8859-1", "Shift_JIS", "EUC-JP", "Windows-31J", "BINARY", "CP932", "eucJP"].include?(value)
40
+ $DATA_ENCODE = value
41
+ else
42
+ raise "Unsupported encoding type."
43
+ end
44
+ end
36
45
  end
37
46
  end
@@ -46,7 +46,7 @@ module OdpsDatahub
46
46
  when /solaris|bsd/
47
47
  "unix"
48
48
  else
49
- raise Error::WebDriverError, "unspport os"
49
+ raise "unspport os"
50
50
  end
51
51
  end
52
52
  end
@@ -36,6 +36,7 @@ module Fluent
36
36
  config_param :project, :string, :default => nil
37
37
  config_param :format, :string, :default => 'out_file'
38
38
  config_param :enable_fast_crc, :bool, :default => false
39
+ config_param :data_encoding, :string, :default => nil
39
40
 
40
41
  attr_accessor :tables
41
42
 
@@ -328,6 +329,9 @@ module Fluent
328
329
  raise e.to_s
329
330
  end
330
331
  end
332
+ if (@data_encoding != nil)
333
+ OdpsDatahub::OdpsConfig::setEncode(@data_encoding)
334
+ end
331
335
  #初始化各个table object
332
336
  @tables.each { |te|
333
337
  te.init(config)
@@ -61,7 +61,6 @@ module OdpsDatahub
61
61
  end
62
62
 
63
63
  def encodeString(value)
64
- value.encode!(::Protobuf::Field::StringField::ENCODING, :invalid => :replace, :undef => :replace, :replace => "")
65
64
  value.force_encoding(::Protobuf::Field::BytesField::BYTES_ENCODING)
66
65
  string_bytes = ::Protobuf::Field::VarintField.encode(value.size)
67
66
  string_bytes << value
@@ -116,7 +115,11 @@ module OdpsDatahub
116
115
  writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
117
116
  upStream.write(encodeDataTime(cellValue))
118
117
  when $ODPS_STRING
119
- cellValue = cellValue.encode('utf-8')
118
+ if $DATA_ENCODE != nil
119
+ cellValue.encode!(::Protobuf::Field::StringField::ENCODING, $DATA_ENCODE)
120
+ else
121
+ cellValue.encode!(::Protobuf::Field::StringField::ENCODING)
122
+ end
120
123
  encode_str = encodeString(cellValue)
121
124
  crc32cRecord.write(cellValue)
122
125
  writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-aliyun-odps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xiao Dong
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-05-26 00:00:00.000000000 Z
12
+ date: 2016-06-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd