fluent-plugin-aliyun-odps 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 33c05a1a6b13ea410537698e8bd6ee0ded655fae
4
- data.tar.gz: 90f5de535dc2d9d48d65442e4f84bebc3d0870fb
3
+ metadata.gz: 1e49e64231330fc82748d84415440558c8503b30
4
+ data.tar.gz: e328545513c76cf9f90cded935e7548037ff83ef
5
5
  SHA512:
6
- metadata.gz: cbc2cf4a5cef08a7f7235972f8bdb4e99a24ffbf4b1d78dd5912683d6eddde45c5e363186c5317085f2965a783c608dc24dba6f1f7b6ace6abf997642566f387
7
- data.tar.gz: 606b07286d5b0ab22dbc01aa7619b9718257b1083e985b295f1ac1be6cc280727ab1edfb4dbae30bce86d5575fc9530f2a6c109a3f49ad821431c51cf1e08095
6
+ metadata.gz: a032d3ea1acdfd27f9f002a1d9eacb5a2b516c18f042b9a8f6aa4d6235220bf7e8c2da9aa7bc321a628d96dee364fbbdbef48ec06a97c404b208ea02b66e3ab4
7
+ data.tar.gz: 5bdc7642e0a3d1d58dcdc0bc6afc1288d152b21703f79db3a66edb3efdd156637481340f280d58d977be596b68fefe09b4773a78355f3827c881c37f19cda484
data/CHANGELOG.md CHANGED
@@ -3,4 +3,6 @@ Fix datetime format bug, support String, DateTime, Time type when write to a dat
3
3
  0.0.5
4
4
  Add reload shard when import fails, and remove unload shard operation when shut down.
5
5
  0.0.6
6
- Add decimal support��fix string input while setting double and int.
6
+ Add decimal support��fix string input while setting double and int.
7
+ 0.0.7
8
+ Add error msg when add partition fail, support fast crc�� remove pack size limit.
data/README.md CHANGED
@@ -98,6 +98,7 @@ $ cp aliyun-odps-fluentd-plugin/lib/fluent/plugin/* {YOUR_FLUENTD_DIRECTORY}/lib
98
98
  - time_format(Optional):
99
99
  - if you are using the key words to set your <partition> and the key word is in time format, please set the param <time_format>. example: source[datetime] = "29/Aug/2015:11:10:16 +0800", and the param <time_format> is "%d/%b/%Y:%H:%M:%S %z"
100
100
  - shard_number(Optional): will write data to shards between [0,shard_number-1], this config must more than 0 and less than the max shard number of your table.
101
+ - enable_fast_crc(Optional): use fast crc.so to calculate crc, this will improve speed up a lot, but this is not supported in some os.
101
102
 
102
103
  ## Useful Links
103
104
  ---
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.7
@@ -17,6 +17,7 @@
17
17
  #under the License.
18
18
  #
19
19
  module OdpsDatahub
20
+ $USE_FAST_CRC = false
20
21
  class OdpsConfig
21
22
  attr_accessor :accessId, :accessKey, :odpsEndpoint, :datahubEndpoint, :defaultProjectName, :userAgent
22
23
 
@@ -28,5 +29,9 @@ module OdpsDatahub
28
29
  @defaultProject = defaultProjectName
29
30
  @userAgent = ""
30
31
  end
32
+
33
+ def self.setFastCrc(value)
34
+ $USE_FAST_CRC = value
35
+ end
31
36
  end
32
37
  end
@@ -0,0 +1,53 @@
1
+ #
2
+ #Licensed to the Apache Software Foundation (ASF) under one
3
+ #or more contributor license agreements. See the NOTICE file
4
+ #distributed with this work for additional information
5
+ #regarding copyright ownership. The ASF licenses this file
6
+ #to you under the Apache License, Version 2.0 (the
7
+ #"License"); you may not use this file except in compliance
8
+ #with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ #Unless required by applicable law or agreed to in writing,
13
+ #software distributed under the License is distributed on an
14
+ #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ #KIND, either express or implied. See the License for the
16
+ #specific language governing permissions and limitations
17
+ #under the License.
18
+ #
19
+ require 'rbconfig'
20
+ module OdpsDatahub
21
+ class CrcCalculator
22
+ # @param [StringIO] data
23
+ # @return crc32c to_i
24
+ def self.calculate(data)
25
+ if (!$USE_FAST_CRC)
26
+ require_relative 'origin/crc32c'
27
+ crc32c = Digest::CRC32c.new
28
+ crc32c.update(data.string)
29
+ return crc32c.checksum.to_i
30
+ elsif getOsType == "linux" || getOsType == "unix"
31
+ require_relative 'lib/linux/crc32c.so'
32
+ return Crc32c.calculate(data.string, data.length, 0).to_i
33
+ elsif getOsType == "windows"
34
+ require_relative 'lib/win/crc32c.so'
35
+ return Crc32c.calculate(data.string, data.length, 0).to_i
36
+ end
37
+ end
38
+
39
+ def self.getOsType
40
+ host_os = RbConfig::CONFIG['host_os']
41
+ case host_os
42
+ when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
43
+ "windows"
44
+ when /linux/
45
+ "linux"
46
+ when /solaris|bsd/
47
+ "unix"
48
+ else
49
+ raise Error::WebDriverError, "unspport os"
50
+ end
51
+ end
52
+ end
53
+ end
File without changes
File without changes
File without changes
@@ -16,6 +16,7 @@
16
16
  #specific language governing permissions and limitations
17
17
  #under the License.
18
18
  #
19
+
19
20
  require 'net/http'
20
21
  require 'base64'
21
22
  require 'openssl'
@@ -18,7 +18,7 @@
18
18
  #
19
19
  module OdpsDatahub
20
20
  $SDK_UA_STR = "ODPS Ruby SDK v0.1"
21
- $MAX_PACK_SIZE = 2048*1024
21
+ $MAX_PACK_SIZE = 2048*10*1024
22
22
  class HttpHeaders
23
23
  $AUTHORIZATION = "Authorization"
24
24
  $CACHE_CONTROL = "Cache-Control"
@@ -225,7 +225,7 @@ module OdpsDatahub
225
225
 
226
226
  res = conn.getResponse
227
227
  if res.code != '200'
228
- raise "Add partition failed with error" + res.code.to_s
228
+ raise "Add partition failed with error " + res.body
229
229
  end
230
230
 
231
231
  if res.to_hash['Content-Length'] != "0" and not res.body.to_s.include?"Instance"
@@ -35,6 +35,7 @@ module Fluent
35
35
  config_param :aliyun_odps_hub_endpoint, :string, :default => nil
36
36
  config_param :project, :string, :default => nil
37
37
  config_param :format, :string, :default => 'out_file'
38
+ config_param :enable_fast_crc, :bool, :default => false
38
39
 
39
40
  attr_accessor :tables
40
41
 
@@ -299,6 +300,10 @@ module Fluent
299
300
  :aliyun_odps_endpoint => @aliyun_odps_endpoint,
300
301
  :aliyun_odps_hub_endpoint => @aliyun_odps_hub_endpoint,
301
302
  }
303
+ #init Global setting
304
+ if (@enable_fast_crc)
305
+ OdpsDatahub::OdpsConfig::setFastCrc(true)
306
+ end
302
307
  #初始化各个table object
303
308
  @tables.each { |te|
304
309
  te.init(config)
@@ -18,7 +18,7 @@
18
18
  #
19
19
  require 'stringio'
20
20
  require 'protobuf'
21
- require_relative '../digest/crc32c'
21
+ require_relative '../crc/origin/crc32c'
22
22
  require_relative '../odps/odps_table'
23
23
 
24
24
  module OdpsDatahub
@@ -19,7 +19,7 @@
19
19
  require 'stringio'
20
20
  require 'protobuf'
21
21
  require_relative '../exceptions'
22
- require_relative '../digest/crc32c'
22
+ require_relative '../crc/crc'
23
23
  require_relative '../odps/odps_table'
24
24
 
25
25
  module OdpsDatahub
@@ -87,10 +87,10 @@ module OdpsDatahub
87
87
  end
88
88
 
89
89
  def serialize(upStream, recordList)
90
- crc32cPack = ::Digest::CRC32c.new
90
+ crc32cPack = StringIO.new
91
91
  if recordList.is_a?Array
92
92
  recordList.each { |record|
93
- crc32cRecord = ::Digest::CRC32c.new
93
+ crc32cRecord = StringIO.new
94
94
  schema = OdpsTableSchema.new
95
95
  schema = record.getTableSchema
96
96
  schema.mCols.each { | col |
@@ -98,45 +98,45 @@ module OdpsDatahub
98
98
  if cellValue == nil
99
99
  next
100
100
  end
101
- crc32cRecord.update(encodeFixed32(col.mIdx + 1))
101
+ crc32cRecord.write(encodeFixed32(col.mIdx + 1))
102
102
  case col.mType
103
103
  when $ODPS_BIGINT
104
- crc32cRecord.update(encodeFixed64(cellValue))
104
+ crc32cRecord.write(encodeFixed64(cellValue))
105
105
  writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
106
106
  upStream.write(encodeSInt64(cellValue))
107
107
  when $ODPS_DOUBLE
108
- crc32cRecord.update(encodeDouble(cellValue))
108
+ crc32cRecord.write(encodeDouble(cellValue))
109
109
  writeTag(col.mIdx + 1, ::Protobuf::WireType::FIXED64, upStream)
110
110
  upStream.write(encodeDouble(cellValue))
111
111
  when $ODPS_BOOLEAN
112
- crc32cRecord.update(encodeBool(cellValue))
112
+ crc32cRecord.write(encodeBool(cellValue))
113
113
  writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
114
114
  upStream.write(encodeBool(cellValue))
115
115
  when $ODPS_DATETIME
116
- crc32cRecord.update(encodeFixed64(cellValue))
116
+ crc32cRecord.write(encodeFixed64(cellValue))
117
117
  writeTag(col.mIdx + 1, ::Protobuf::WireType::VARINT, upStream)
118
118
  upStream.write(encodeDataTime(cellValue))
119
119
  when $ODPS_STRING
120
- crc32cRecord.update(cellValue)
120
+ crc32cRecord.write(cellValue)
121
121
  writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
122
122
  upStream.write(encodeString(cellValue))
123
123
  when $ODPS_DECIMAL
124
- crc32cRecord.update(cellValue)
124
+ crc32cRecord.write(cellValue)
125
125
  writeTag(col.mIdx + 1, ::Protobuf::WireType::LENGTH_DELIMITED, upStream)
126
126
  upStream.write(encodeString(cellValue))
127
127
  else
128
128
  raise OdpsDatahubException.new($INVALID_ARGUMENT, "invalid mType")
129
129
  end
130
130
  }
131
- recordCrc = crc32cRecord.checksum.to_i
131
+ recordCrc = CrcCalculator::calculate(crc32cRecord)
132
132
  writeTag($TUNNEL_END_RECORD, ::Protobuf::WireType::VARINT, upStream)
133
133
  upStream.write(encodeUInt32(recordCrc))
134
- crc32cPack.update(encodeFixed32(recordCrc))
134
+ crc32cPack.write(encodeFixed32(recordCrc))
135
135
  }
136
136
  writeTag($TUNNEL_META_COUNT, ::Protobuf::WireType::VARINT, upStream)
137
137
  upStream.write(encodeSInt64(recordList.size))
138
138
  writeTag($TUNNEL_META_CHECKSUM, ::Protobuf::WireType::VARINT, upStream)
139
- upStream.write(encodeUInt32(crc32cPack.checksum))
139
+ upStream.write(encodeUInt32(CrcCalculator::calculate(crc32cPack)))
140
140
  else
141
141
  raise OdpsDatahubException.new($INVALID_ARGUMENT, "param must be a array")
142
142
  end
@@ -93,7 +93,7 @@ module OdpsDatahub
93
93
  upStream = Zlib::Deflate.deflate(@mUpStream.string)
94
94
  header[$CONTENT_MD5] = Digest::MD5.hexdigest(upStream)
95
95
  header[$CONTENT_LENGTH] = upStream.length.to_s
96
- #MAX_LENGTH 2048KB
96
+ #MAX_LENGTH 2048*10KB
97
97
  if upStream.length > $MAX_PACK_SIZE
98
98
  raise OdpsDatahubException.new($PACK_SIZE_EXCEED, "pack size:" + upStream.length.to_s)
99
99
  end
@@ -109,6 +109,7 @@ module OdpsDatahub
109
109
  if res.code != "200"
110
110
  raise OdpsDatahubException.new(json_obj["Code"], "write failed because " + json_obj["Message"])
111
111
  end
112
+ return json_obj
112
113
  end
113
114
 
114
115
  private
data/odps_example.conf CHANGED
@@ -21,6 +21,7 @@
21
21
  buffer_queue_limit 128
22
22
  flush_interval 5s
23
23
  project your_projectName
24
+ enable_fast_crc false
24
25
  <table in.log>
25
26
  table your_tableName
26
27
  fields r1,r2,r3,r4,r5,r6,blue
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-aliyun-odps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xiao Dong
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-11-02 00:00:00.000000000 Z
12
+ date: 2015-12-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -125,15 +125,17 @@ files:
125
125
  - CHANGELOG.md
126
126
  - Gemfile
127
127
  - License
128
- - README.cn.md
129
128
  - README.md
130
129
  - Rakefile
131
130
  - VERSION
132
131
  - fluent-plugin-aliyun-odps.gemspec
133
132
  - lib/fluent/plugin/conf/config.rb
134
- - lib/fluent/plugin/digest/crc.rb
135
- - lib/fluent/plugin/digest/crc32.rb
136
- - lib/fluent/plugin/digest/crc32c.rb
133
+ - lib/fluent/plugin/crc/crc.rb
134
+ - lib/fluent/plugin/crc/lib/linux/crc32c.so
135
+ - lib/fluent/plugin/crc/lib/win/crc32c.so
136
+ - lib/fluent/plugin/crc/origin/crc.rb
137
+ - lib/fluent/plugin/crc/origin/crc32.rb
138
+ - lib/fluent/plugin/crc/origin/crc32c.rb
137
139
  - lib/fluent/plugin/exceptions.rb
138
140
  - lib/fluent/plugin/http/http_connection.rb
139
141
  - lib/fluent/plugin/http/http_flag.rb
data/README.cn.md DELETED
@@ -1,112 +0,0 @@
1
- # Aliyun ODPS Plugin for Fluentd
2
-
3
- ## ��ʼʹ��
4
- ---
5
-
6
- ### ����
7
-
8
- - �������ݴ�������(Open Data Processing Service�����ODPS)�ǰ���Ͱ������з��ĺ������ݴ���ƽ̨����Ҫ�����������ṹ�����ݵĴ洢�ͼ��㣬�����ṩ�������ݲֿ�Ľ�������Լ���Դ����ݵķ�����ģ����
9
- - ODPS DataHub Service(DHS)��һ��ODPS���ڽ��������û��ṩʵʱ���ݵķ���(Publish)�Ͷ���(Subscribe)�Ĺ��ܡ�
10
-
11
-
12
- ### ����Ҫ��
13
-
14
- ʹ�ô˲������Ҫ�߱����»���:
15
-
16
- 1. Ruby 2.1.0 �����
17
- 2. Gem 2.4.5 �����
18
- 3. Fluentd-0.10.49 ����� (*[Home Page](http://www.fluentd.org/)*)
19
- 4. Protobuf-3.5.1 �����(Ruby protobuf)
20
-
21
- ### GEM��װ
22
-
23
- ��ruby gem��װʹ��:
24
-
25
- ```
26
- $ gem install fluent-plugin-aliyun-odps
27
- ```
28
-
29
- ### Դ�밲װ���
30
-
31
- * ���Ȱ�װ����������fluentd�Լ�protobuf��
32
- * Ȼ�󽫲��Ŀ¼������Fluentd���Ŀ¼�£����磺 ��aliyun-odps-fluentd-plugin/lib/fluent/pluginĿ¼���Ƶ�{YOUR_FLUENTD_DIRECTORY}/lib/fluent/plugin�С�
33
-
34
- ```
35
- $ gem install protobuf
36
- $ gem install fluentd --no-ri --no-rdoc
37
- $ git clone https://github.com/aliyun/aliyun-odps-fluentd-plugin.git
38
- $ cp aliyun-odps-fluentd-plugin/lib/fluent/plugin/* {YOUR_FLUENTD_DIRECTORY}/lib/fluent/plugin/ -r
39
- ```
40
-
41
- ### ���ʹ��ʾ��
42
-
43
- ```
44
- <source>
45
- type tail
46
- path /opt/log/in/in.log
47
- pos_file /opt/log/in/in.log.pos
48
- refresh_interval 5s
49
- tag in.log
50
- format /^(?<remote>[^ ]*) - - \[(?<datetime>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "-" "(?<agent>[^\"]*)"$/
51
- time_format %Y%b%d %H:%M:%S %z
52
- </source>
53
- ```
54
- ```
55
- <match in.**>
56
- type aliyun_odps
57
- aliyun_access_id ************
58
- aliyun_access_key *********
59
- aliyun_odps_endpoint http://service.odps.aliyun.com/api
60
- aliyun_odps_hub_endpoint http://dh.odps.aliyun.com
61
- buffer_chunk_limit 2m
62
- buffer_queue_limit 128
63
- flush_interval 5s
64
- project your_projectName
65
- <table in.log>
66
- table your_tableName
67
- fields remote,method,path,code,size,agent
68
- partition ctime=${datetime.strftime('%Y%m%d')}
69
- time_format %d/%b/%Y:%H:%M:%S %z
70
- shard_number 1
71
- </table>
72
- </match>
73
- ```
74
- ### ����˵��
75
-
76
- - type(Fixed): �̶�ֵ aliyun_odps.
77
- - aliyun_access_id(Required):������access_id.
78
- - aliyun_access_key(Required):������access key.
79
- - aliyun_odps_hub_endpoint(Required):�����ķ�������ESC�ϣ���ѱ�ֵ�趨Ϊ http://dh-ext.odps.aliyun-inc.com, ��������Ϊ http://dh.odps.aliyun.com.
80
- - aliyunodps_endpoint(Required):�����ķ�������ESC�ϣ���ѱ�ֵ�趨Ϊ http://odps-ext.aiyun-inc.com/api, ��������Ϊ http://service.odps.aliyun.com/api .
81
- - buffer_chunk_limit(Optional): ���С��֧�֡�k��(KB),��m��(MB),��g��(GB)��λ��Ĭ�� 8MB������ֵ2MB.
82
- - buffer_queue_limit(Optional): ����д�С����ֵ��buffer_chunk_limit��ͬ����������������С��
83
- - flush_interval(Optional): ǿ�Ʒ��ͼ�����ﵽʱ��������δ����ǿ�Ʒ���, Ĭ�� 60s.
84
- - project(Required): project����.
85
- - table(Required): table����.
86
- - fields(Required): ��source��Ӧ���ֶ������������source֮��.
87
- - partition(Optional)����Ϊ�������������ô���.
88
- - ������֧�ֵ�����ģʽ:
89
- - �̶�ֵ: partition ctime=20150804
90
- - �ؼ���: partition ctime=${remote} ������remoteΪsource��ij�ֶΣ�
91
- - ʱ���ʽ�ؼ���: partition ctime=${datetime.strftime('%Y%m%d')} ������datetimeΪsource��ijʱ���ʽ�ֶΣ����Ϊ%Y%m%d��ʽ��Ϊ�������ƣ�
92
- - time_format(Optional):
93
- - ���ʹ��ʱ���ʽ�ؼ���Ϊ<partition>, �����ñ�����. ����: source[datetime]="29/Aug/2015:11:10:16 +0800",������<time_format>Ϊ"%d/%b/%Y:%H:%M:%S %z"
94
- - shard_number(Optional):ָ��shard���������������shard[0,shard_number-1]��Χ�ڵ�shard��д�����ݣ�����Ϊ����0��С��table��Ӧshard�������޵�����.
95
-
96
- ## �ٷ���վ
97
- ---
98
-
99
- - [Fluentd User Guide](http://docs.fluentd.org/)
100
-
101
- ## ����
102
- ---
103
-
104
- - [Sun Zongtao]()
105
- - [Cai Ying]()
106
- - [Dong Xiao](https://github.com/dongxiao1198)
107
- - [Yang Hongbo](https://github.com/hongbosoftware)
108
-
109
- ## License
110
- ---
111
-
112
- licensed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.html)