fluent-plugin-aliyun-odps 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3557dd3c4653efb184fb04200769c5a9f53a6947
4
- data.tar.gz: 6e35787b0de40f61db279b5d640209e8d800d5e9
3
+ metadata.gz: 06cc7b0378693eab79c346e913747092b1ef84af
4
+ data.tar.gz: 8fe96f58093519df5ddf21e1cf4f8b1aaa71f0de
5
5
  SHA512:
6
- metadata.gz: 7e37bd48d24117a0968f3fd6646b3ce8c64f133138410cbebcae287fe8ccfb6b4a8d1ebe57dd140e8c25fe1a4d9a430ef4028647508cd1514ab4283d29a6a1ad
7
- data.tar.gz: 8112131d1bb7e0c09b31f98af58e2b28370107a5e715b5f1c95f5e5811cac1bafb3b692cbb9f2367ba2105db4bdfcc7be193996b2f0c93be723b1b190a2190b5
6
+ metadata.gz: 6b2306b8bcc789d32b3bceb6640c5ebb8dfa23524df89020026c4693415ac0d445abf0cefee8f2c28f1595853549868eb92ba85ba78988d1f0b4148b0c6336d1
7
+ data.tar.gz: 835ef6d097a2599dc1839d7db5957b38755b09416020370c609c99c18fa51560e407a116becfcdf287d6ee472baccdd958ee90454342832781edbadcb75d5fd2
@@ -19,4 +19,8 @@ Use XStreamPack.
19
19
  0.1.3
20
20
  Drop record with error log when parse partition failed.
21
21
  0.1.5
22
- Fix string encode replace unknow char
22
+ Fix string encode replace unknow char
23
+ 0.1.6
24
+ Fix raise exception bug
25
+ 0.1.7
26
+ Add data_encoding config to format data.
@@ -127,6 +127,7 @@ select * from students;
127
127
  flush_interval 5s
128
128
  project your_projectName
129
129
  enable_fast_crc true
130
+ data_encoding UTF-8
130
131
  <table nginx.access>
131
132
  table nginx_logs #对应日志写入的odps表
132
133
  fields remote,method,path,code,size,agent,requesttime
@@ -170,6 +171,7 @@ select * from students;
170
171
  - retry_time(Optional): 发送每个pack数据时内置重试次数,默认3次.
171
172
  - retry_interval(Optional): 重试间隔,默认1s.
172
173
  - abandon_mode(Optional): 默认为false,设置成true会在重试retry_time后抛弃该数据包,否则会将异常抛送给fluentd,利用fluentd的重试机制重试,这种情况可能会导致数据重复.
174
+ - data_encoding(Optional): 默认使用源数据标示的encode方式,根据string.encoding获取,如果需要指定源数据编码方式,请设置该值,支持的类型:"US-ASCII", "ASCII-8BIT", "UTF-8", "ISO-8859-1", "Shift_JIS", "EUC-JP", "Windows-31J", "BINARY", "CP932", "eucJP"
173
175
 
174
176
  ## 常见使用问题以及异常描述
175
177
  ---
@@ -193,7 +195,9 @@ select * from students;
193
195
  - 这个错误信息抛出代表解析partition过程出现问题,请检查partition配置,如果数据中存在脏数据也可能遇到这个问题。
194
196
  * 如何更改为淘宝源RubyGem?
195
197
  - RubyGems 镜像[https://ruby.taobao.org/]
196
-
198
+ * 导入数据抛异常"\xE8" from ASCII-8BIT to UTF-8 (Encoding::UndefinedConversionError)
199
+ - 该错误往往由于source插件在读取数据时,数据真实编码为utf-8,但是transport给fluend的string.encoding却为ASCII-8BIT导致类似错误,这种情况需要设置data_encoding来进行转码。
200
+
197
201
  ## 官方网站
198
202
  - [Fluentd User Guide](http://docs.fluentd.org/)
199
203
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 0.1.7
@@ -18,6 +18,7 @@
18
18
  #
19
19
  module OdpsDatahub
20
20
  $USE_FAST_CRC = false
21
+ $DATA_ENCODE = nil
21
22
  class OdpsConfig
22
23
  attr_accessor :accessId, :accessKey, :odpsEndpoint, :datahubEndpoint, :defaultProjectName, :userAgent
23
24
 
@@ -33,5 +34,13 @@ module OdpsDatahub
33
34
  def self.setFastCrc(value)
34
35
  $USE_FAST_CRC = value
35
36
  end
37
+
38
+ def self.setEncode(value)
39
+ if ["US-ASCII", "ASCII-8BIT", "UTF-8", "ISO-8859-1", "Shift_JIS", "EUC-JP", "Windows-31J", "BINARY", "CP932", "eucJP"].include?(value)
40
+ $DATA_ENCODE = value
41
+ else
42
+ raise "Unsupported encoding type."
43
+ end
44
+ end
36
45
  end
37
46
  end
@@ -46,7 +46,7 @@ module OdpsDatahub
46
46
  when /solaris|bsd/
47
47
  "unix"
48
48
  else
49
- raise Error::WebDriverError, "unspport os"
49
+ raise "unspport os"
50
50
  end
51
51
  end
52
52
  end
@@ -36,6 +36,7 @@ module Fluent
36
36
  config_param :project, :string, :default => nil
37
37
  config_param :format, :string, :default => 'out_file'
38
38
  config_param :enable_fast_crc, :bool, :default => false
39
+ config_param :data_encoding, :string, :default => nil
39
40
 
40
41
  attr_accessor :tables
41
42
 
@@ -328,6 +329,9 @@ module Fluent
328
329
  raise e.to_s
329
330
  end
330
331
  end
332
+ if (@data_encoding != nil)
333
+ OdpsDatahub::OdpsConfig::setEncode(@data_encoding)
334
+ end
331
335
  #初始化各个table object
332
336
  @tables.each { |te|
333
337
  te.init(config)
@@ -61,7 +61,6 @@ module OdpsDatahub
61
61
  end
62
62
 
63
63
  def encodeString(value)
64
- value.encode!(::Protobuf::Field::StringField::ENCODING, :invalid => :replace, :undef => :replace, :replace => "")
65
64
  value.force_encoding(::Protobuf::Field::BytesField::BYTES_ENCODING)
66
65
  string_bytes = ::Protobuf::Field::VarintField.encode(value.size)
67
66
  string_bytes << value
@@ -116,7 +115,11 @@ module OdpsDatahub
116
115
  writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
117
116
  upStream.write(encodeDataTime(cellValue))
118
117
  when $ODPS_STRING
119
- cellValue = cellValue.encode('utf-8')
118
+ if $DATA_ENCODE != nil
119
+ cellValue.encode!(::Protobuf::Field::StringField::ENCODING, $DATA_ENCODE)
120
+ else
121
+ cellValue.encode!(::Protobuf::Field::StringField::ENCODING)
122
+ end
120
123
  encode_str = encodeString(cellValue)
121
124
  crc32cRecord.write(cellValue)
122
125
  writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-aliyun-odps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xiao Dong
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-05-26 00:00:00.000000000 Z
12
+ date: 2016-06-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd