logstash-output-datahub 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +0 -4
- data/lib/logstash/outputs/datahub.rb +357 -341
- data/logstash-output-datahub.gemspec +2 -2
- metadata +3 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e846682c134462b56c1ee792a34143797b6b34ce
|
4
|
+
data.tar.gz: a36cd441f580ffd6763fc41cae69de4c7924f05b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb309d8008b270ac7a8b485c57d48e2d85e3e548ec4d8d963fa4b318b7250b54f07a4cc26ce4ef67a18b80dda43ca4f589282765b2ab87de333093cc1189205c
|
7
|
+
data.tar.gz: b48d2a40c2677e0c262383765970fe63396bff623bd94449f95b1ebf04dbc6a0a5a902af4ccca2c01ebabafad1d5e9bd38efec8344ffdf0a9e73320b85f7f09b
|
data/README.md
CHANGED
@@ -71,8 +71,6 @@ output {
|
|
71
71
|
topic_name => ""
|
72
72
|
#shard_id => "0"
|
73
73
|
#shard_keys => ["thread_id"]
|
74
|
-
batch_size => 10
|
75
|
-
batch_timeout => 5
|
76
74
|
dirty_data_continue => true
|
77
75
|
dirty_data_file => "/Users/ph0ly/trash/dirty.data"
|
78
76
|
dirty_data_file_max_size => 1000
|
@@ -89,8 +87,6 @@ project_name(Required): datahub项目名称
|
|
89
87
|
topic_name(Required): datahub topic名称
|
90
88
|
retry_times(Optional): 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
|
91
89
|
retry_interval(Optional): 下一次重试的间隔,单位为秒
|
92
|
-
batch_size(Optional): 批量提交大小,指定数据积攒到@batch_size大小时触发一次提交,默认100
|
93
|
-
batch_timeout(Optional): 批量提交超时,在数据量较少的情况下,数据超时后的超时提交,默认5秒
|
94
90
|
shard_keys(Optional):数组类型,数据落shard的字段名称,插件会根据这些字段的值计算hash将每条数据落某个shard, 注意shard_keys和shard_id都未指定,默认轮询落shard
|
95
91
|
shard_id(Optional): 所有数据落指定的shard,注意shard_keys和shard_id都未指定,默认轮询落shard
|
96
92
|
dirty_data_continue(Optional): 脏数据是否继续运行,默认为false,如果指定true,则遇到脏数据直接无视,继续处理数据。当开启该开关,必须指定@dirty_data_file文件
|
@@ -1,341 +1,357 @@
|
|
1
|
-
#
|
2
|
-
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
-
#or more contributor license agreements. See the NOTICE file
|
4
|
-
#distributed with this work for additional information
|
5
|
-
#regarding copyright ownership. The ASF licenses this file
|
6
|
-
#to you under the Apache License, Version 2.0 (the
|
7
|
-
#"License"); you may not use this file except in compliance
|
8
|
-
#with the License. You may obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
#Unless required by applicable law or agreed to in writing,
|
13
|
-
#software distributed under the License is distributed on an
|
14
|
-
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
-
#KIND, either express or implied. See the License for the
|
16
|
-
#specific language governing permissions and limitations
|
17
|
-
#under the License.
|
18
|
-
#
|
19
|
-
require "logstash/outputs/base"
|
20
|
-
require "logstash/namespace"
|
21
|
-
require "logstash/environment"
|
22
|
-
require "fileutils"
|
23
|
-
require "thread"
|
24
|
-
|
25
|
-
jar_path=File.expand_path(File.join(File.dirname(__FILE__), "../../.."))
|
26
|
-
LogStash::Environment.load_runtime_jars! File.join(jar_path, "vendor")
|
27
|
-
|
28
|
-
# Datahub output plugin
|
29
|
-
class LogStash::Outputs::Datahub < LogStash::Outputs::Base
|
30
|
-
declare_threadsafe!
|
31
|
-
|
32
|
-
config_name "datahub"
|
33
|
-
|
34
|
-
# datahub access id
|
35
|
-
config :access_id, :validate => :string, :required => true
|
36
|
-
|
37
|
-
# datahub access key
|
38
|
-
config :access_key, :validate => :string, :required => true
|
39
|
-
|
40
|
-
# datahub service endpoint
|
41
|
-
config :endpoint, :validate => :string, :required => true
|
42
|
-
|
43
|
-
# datahub project name
|
44
|
-
config :project_name, :validate => :string, :required => true
|
45
|
-
|
46
|
-
# datahub topic name
|
47
|
-
config :topic_name, :validate => :string, :required => true
|
48
|
-
|
49
|
-
# 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
|
50
|
-
config :retry_times, :validate => :number, :required => false, :default => -1
|
51
|
-
|
52
|
-
# 重试周期,下一次重试的间隔,单位为秒
|
53
|
-
config :retry_interval, :validate => :number, :required => false, :default => 5
|
54
|
-
|
55
|
-
# 按照指定字段的值计算hash,依据于该hash值落某个shard
|
56
|
-
config :shard_keys, :validate => :array, :required => false, :default => []
|
57
|
-
|
58
|
-
# 指定数据落指定的shard
|
59
|
-
config :shard_id, :validate => :string, :required => false, :default => ""
|
60
|
-
|
61
|
-
# # 提交的列名,用户可以配置topic的列,采集部分列或者全部列
|
62
|
-
# # 默认为空数组,表示按照topic的顺序及全字段提交
|
63
|
-
# # 另外:列的配置不用保序,但是要求该字段在topic的schema中存在
|
64
|
-
# config :column_names, :validate => :array, :required => false, :default => []
|
65
|
-
|
66
|
-
# 当出现脏数据时,是否继续写入
|
67
|
-
# 当开启该开关,必须指定@dirty_data_file文件
|
68
|
-
config :dirty_data_continue, :validate => :boolean, :required => false, :default => false
|
69
|
-
|
70
|
-
# 脏数据文件名称,当数据文件名称,在@dirty_data_continue开启的情况下,需要指定该值
|
71
|
-
# 特别注意:脏数据文件将被分割成两个部分.part1和.part2,part1作为更早的脏数据,part2作为更新的数据
|
72
|
-
config :dirty_data_file, :validate => :string, :required => false
|
73
|
-
|
74
|
-
# 脏数据文件的最大大小,该值保证脏数据文件最大大小不超过这个值,目前该值仅是一个参考值
|
75
|
-
config :dirty_data_file_max_size, :validate => :number, :required => false, :default => 50024000
|
76
|
-
|
77
|
-
# 数据传输压缩方式选择,目前支持deflate, lz4格式
|
78
|
-
config :compress_method, :validate => :string, :required => false, :default => ""
|
79
|
-
|
80
|
-
# 该值内部使用,不提供配置
|
81
|
-
# 分发shard的游标
|
82
|
-
attr_accessor :shard_cursor
|
83
|
-
|
84
|
-
# Shard cursor lock
|
85
|
-
@@shard_lock = Mutex.new
|
86
|
-
|
87
|
-
# 写文件锁
|
88
|
-
@@file_lock = Mutex.new
|
89
|
-
|
90
|
-
DatahubPackage = com.aliyun.datahub
|
91
|
-
|
92
|
-
public
|
93
|
-
def register
|
94
|
-
begin
|
95
|
-
@account = DatahubPackage.auth.AliyunAccount::new(@access_id, @access_key)
|
96
|
-
@conf = DatahubPackage.DatahubConfiguration::new(@account, @endpoint)
|
97
|
-
if @compress_method == "deflate" || @compress_method == "lz4"
|
98
|
-
@compression_format = DatahubPackage.model.compress.CompressionFormat.fromValue(@compress_method)
|
99
|
-
@conf.setCompressionFormat(@compression_format)
|
100
|
-
end
|
101
|
-
|
102
|
-
@client = DatahubPackage.DatahubClient::new(@conf)
|
103
|
-
@project = DatahubPackage.wrapper.Project::Builder.build(@project_name, @client)
|
104
|
-
@topic = @project.getTopic(@topic_name)
|
105
|
-
@shard_cursor = 0
|
106
|
-
|
107
|
-
@shards = get_active_shards(@topic.listShard())
|
108
|
-
@shard_count = @shards.size()
|
109
|
-
|
110
|
-
result = @client.getTopic(@project_name, @topic_name)
|
111
|
-
@schema = result.getRecordSchema()
|
112
|
-
fields = @schema.getFields()
|
113
|
-
@columns_size = fields.size
|
114
|
-
@
|
115
|
-
for i in 0...@columns_size
|
116
|
-
@
|
117
|
-
end
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
end
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
entry.
|
181
|
-
elsif field_type == DatahubPackage.common.data.FieldType::
|
182
|
-
entry.
|
183
|
-
elsif field_type == DatahubPackage.common.data.FieldType::
|
184
|
-
entry.
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
retry
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
require "logstash/outputs/base"
|
20
|
+
require "logstash/namespace"
|
21
|
+
require "logstash/environment"
|
22
|
+
require "fileutils"
|
23
|
+
require "thread"
|
24
|
+
|
25
|
+
jar_path=File.expand_path(File.join(File.dirname(__FILE__), "../../.."))
|
26
|
+
LogStash::Environment.load_runtime_jars! File.join(jar_path, "vendor")
|
27
|
+
|
28
|
+
# Datahub output plugin
|
29
|
+
class LogStash::Outputs::Datahub < LogStash::Outputs::Base
|
30
|
+
declare_threadsafe!
|
31
|
+
|
32
|
+
config_name "datahub"
|
33
|
+
|
34
|
+
# datahub access id
|
35
|
+
config :access_id, :validate => :string, :required => true
|
36
|
+
|
37
|
+
# datahub access key
|
38
|
+
config :access_key, :validate => :string, :required => true
|
39
|
+
|
40
|
+
# datahub service endpoint
|
41
|
+
config :endpoint, :validate => :string, :required => true
|
42
|
+
|
43
|
+
# datahub project name
|
44
|
+
config :project_name, :validate => :string, :required => true
|
45
|
+
|
46
|
+
# datahub topic name
|
47
|
+
config :topic_name, :validate => :string, :required => true
|
48
|
+
|
49
|
+
# 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
|
50
|
+
config :retry_times, :validate => :number, :required => false, :default => -1
|
51
|
+
|
52
|
+
# 重试周期,下一次重试的间隔,单位为秒
|
53
|
+
config :retry_interval, :validate => :number, :required => false, :default => 5
|
54
|
+
|
55
|
+
# 按照指定字段的值计算hash,依据于该hash值落某个shard
|
56
|
+
config :shard_keys, :validate => :array, :required => false, :default => []
|
57
|
+
|
58
|
+
# 指定数据落指定的shard
|
59
|
+
config :shard_id, :validate => :string, :required => false, :default => ""
|
60
|
+
|
61
|
+
# # 提交的列名,用户可以配置topic的列,采集部分列或者全部列
|
62
|
+
# # 默认为空数组,表示按照topic的顺序及全字段提交
|
63
|
+
# # 另外:列的配置不用保序,但是要求该字段在topic的schema中存在
|
64
|
+
# config :column_names, :validate => :array, :required => false, :default => []
|
65
|
+
|
66
|
+
# 当出现脏数据时,是否继续写入
|
67
|
+
# 当开启该开关,必须指定@dirty_data_file文件
|
68
|
+
config :dirty_data_continue, :validate => :boolean, :required => false, :default => false
|
69
|
+
|
70
|
+
# 脏数据文件名称,当数据文件名称,在@dirty_data_continue开启的情况下,需要指定该值
|
71
|
+
# 特别注意:脏数据文件将被分割成两个部分.part1和.part2,part1作为更早的脏数据,part2作为更新的数据
|
72
|
+
config :dirty_data_file, :validate => :string, :required => false
|
73
|
+
|
74
|
+
# 脏数据文件的最大大小,该值保证脏数据文件最大大小不超过这个值,目前该值仅是一个参考值
|
75
|
+
config :dirty_data_file_max_size, :validate => :number, :required => false, :default => 50024000
|
76
|
+
|
77
|
+
# 数据传输压缩方式选择,目前支持deflate, lz4格式
|
78
|
+
config :compress_method, :validate => :string, :required => false, :default => ""
|
79
|
+
|
80
|
+
# 该值内部使用,不提供配置
|
81
|
+
# 分发shard的游标
|
82
|
+
attr_accessor :shard_cursor
|
83
|
+
|
84
|
+
# Shard cursor lock
|
85
|
+
@@shard_lock = Mutex.new
|
86
|
+
|
87
|
+
# 写文件锁
|
88
|
+
@@file_lock = Mutex.new
|
89
|
+
|
90
|
+
DatahubPackage = com.aliyun.datahub
|
91
|
+
|
92
|
+
public
|
93
|
+
def register
|
94
|
+
begin
|
95
|
+
@account = DatahubPackage.auth.AliyunAccount::new(@access_id, @access_key)
|
96
|
+
@conf = DatahubPackage.DatahubConfiguration::new(@account, @endpoint)
|
97
|
+
if @compress_method == "deflate" || @compress_method == "lz4"
|
98
|
+
@compression_format = DatahubPackage.model.compress.CompressionFormat.fromValue(@compress_method)
|
99
|
+
@conf.setCompressionFormat(@compression_format)
|
100
|
+
end
|
101
|
+
|
102
|
+
@client = DatahubPackage.DatahubClient::new(@conf)
|
103
|
+
@project = DatahubPackage.wrapper.Project::Builder.build(@project_name, @client)
|
104
|
+
@topic = @project.getTopic(@topic_name)
|
105
|
+
@shard_cursor = 0
|
106
|
+
|
107
|
+
@shards = get_active_shards(@topic.listShard())
|
108
|
+
@shard_count = @shards.size()
|
109
|
+
|
110
|
+
result = @client.getTopic(@project_name, @topic_name)
|
111
|
+
@schema = result.getRecordSchema()
|
112
|
+
fields = @schema.getFields()
|
113
|
+
@columns_size = fields.size
|
114
|
+
@columnnames = []
|
115
|
+
for i in 0...@columns_size
|
116
|
+
@columnnames.push(fields[i].getName())
|
117
|
+
end
|
118
|
+
@columntypes = []
|
119
|
+
for i in 0...@columns_size
|
120
|
+
@columntypes.push(fields[i].getType())
|
121
|
+
end
|
122
|
+
|
123
|
+
# 前置校验参数
|
124
|
+
check_params()
|
125
|
+
|
126
|
+
if @shard_count == 0
|
127
|
+
@logger.error "No active shard available, please check"
|
128
|
+
raise "No active shard available, please check"
|
129
|
+
end
|
130
|
+
|
131
|
+
@logger.info "Init datahub success!"
|
132
|
+
rescue => e
|
133
|
+
@logger.error "Init failed!" + e.message + " " + e.backtrace.inspect.to_s
|
134
|
+
raise e
|
135
|
+
end
|
136
|
+
end # def register
|
137
|
+
|
138
|
+
def check_params()
|
139
|
+
# 如果shard_id配置了,则检查该shard是否ok
|
140
|
+
if !@shard_id.empty?
|
141
|
+
valid = false
|
142
|
+
for i in 0...@shards.size
|
143
|
+
shard_entry = @shards[i]
|
144
|
+
if shard_entry.getShardId() == @shard_id && shard_entry.getState() == DatahubPackage.model.ShardState::ACTIVE
|
145
|
+
valid = true
|
146
|
+
end
|
147
|
+
end
|
148
|
+
if (!valid)
|
149
|
+
@logger.error "Config shard_id not exists or state not active, check your config"
|
150
|
+
raise "Config shard_id not exists or state not active, check your config"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# 检查shard_keys字段是否合法
|
155
|
+
if @shard_keys.size > 0
|
156
|
+
for i in 0...@shard_keys.size
|
157
|
+
shard_key = @shard_keys[i]
|
158
|
+
if !@schema.containsField(shard_key)
|
159
|
+
@logger.error "Config shard_keys contains one or one more unknown field, check your config"
|
160
|
+
raise "Config shard_keys contains one or one more unknown field, check your config"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# 配置了脏数据继续,必须指定脏数据文件
|
166
|
+
if @dirty_data_continue
|
167
|
+
if @dirty_data_file.to_s.chomp.length == 0
|
168
|
+
raise "Dirty data file path can not be empty"
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
# 检查并设置数据到entry中
|
175
|
+
# 如果解析数据异常,则数据落脏数据文件
|
176
|
+
def check_and_set_data(entry, field_type, index, event_map, column_name)
|
177
|
+
data = event_map[column_name]
|
178
|
+
begin
|
179
|
+
if field_type == DatahubPackage.common.data.FieldType::STRING
|
180
|
+
entry.setString(index, data.to_s)
|
181
|
+
elsif field_type == DatahubPackage.common.data.FieldType::BIGINT
|
182
|
+
entry.setBigint(index, java.lang.Long.parseLong(data.to_s))
|
183
|
+
elsif field_type == DatahubPackage.common.data.FieldType::DOUBLE
|
184
|
+
entry.setDouble(index, java.lang.Double.parseDouble(data.to_s))
|
185
|
+
elsif field_type == DatahubPackage.common.data.FieldType::BOOLEAN
|
186
|
+
entry.setBoolean(index, java.lang.Boolean.parseBoolean(data.to_s))
|
187
|
+
elsif field_type == DatahubPackage.common.data.FieldType::TIMESTAMP
|
188
|
+
entry.setTimeStamp(index, java.lang.Long.parseLong(data.to_s))
|
189
|
+
else
|
190
|
+
raise "Unknown schema type of data"
|
191
|
+
end
|
192
|
+
return true
|
193
|
+
rescue => e
|
194
|
+
@logger.error "Parse data: " + column_name + "[" + data + "] failed, " + e.message
|
195
|
+
# 数据格式有异常,根据配置参数确定是否续跑
|
196
|
+
if !@dirty_data_continue
|
197
|
+
@logger.error "Dirty data found, exit process now."
|
198
|
+
puts "Dirty data found, exit process now."
|
199
|
+
Process.exit(1)
|
200
|
+
# 忽略的异常数据直接落文件
|
201
|
+
else
|
202
|
+
write_as_dirty_data(event_map)
|
203
|
+
end
|
204
|
+
return false
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# 脏数据文件处理
|
209
|
+
def write_as_dirty_data(event_amp)
|
210
|
+
dirty_file_part1_name = @dirty_data_file + ".part1"
|
211
|
+
dirty_file_part2_name = @dirty_data_file + ".part2"
|
212
|
+
|
213
|
+
# 加锁写入
|
214
|
+
@@file_lock.synchronize {
|
215
|
+
dirty_file_part2 = File.open(dirty_file_part2_name, "a+")
|
216
|
+
dirty_file_part2.puts(event_amp.to_s)
|
217
|
+
dirty_file_part2.close
|
218
|
+
if File.size(dirty_file_part2_name) > @dirty_data_file_max_size / 2
|
219
|
+
# .part1, .part2分别存储数据
|
220
|
+
# 旧数据落part1,新的数据落part2
|
221
|
+
FileUtils.mv(dirty_file_part2_name, dirty_file_part1_name)
|
222
|
+
end
|
223
|
+
}
|
224
|
+
end
|
225
|
+
|
226
|
+
def get_active_shards(shards)
|
227
|
+
active_shards = []
|
228
|
+
for i in 0...shards.size
|
229
|
+
entry = shards.get(i)
|
230
|
+
if entry.getState() == DatahubPackage.model.ShardState::ACTIVE
|
231
|
+
active_shards.push(entry)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
return active_shards
|
235
|
+
end
|
236
|
+
|
237
|
+
def get_next_shard_id()
|
238
|
+
if !@shard_id.empty?
|
239
|
+
return @shard_id
|
240
|
+
# 否则轮询写入shard
|
241
|
+
else
|
242
|
+
idx = 0
|
243
|
+
@@shard_lock.synchronize {
|
244
|
+
idx = @shard_cursor % @shard_count
|
245
|
+
@shard_cursor = idx + 1
|
246
|
+
}
|
247
|
+
shard_id = @shards[idx].getShardId()
|
248
|
+
return shard_id
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def multi_receive(event_list)
|
253
|
+
retry_count = 0
|
254
|
+
begin
|
255
|
+
entries = []
|
256
|
+
shard_id = get_next_shard_id()
|
257
|
+
|
258
|
+
event_list.each do |event|
|
259
|
+
if event == LogStash::SHUTDOWN
|
260
|
+
return
|
261
|
+
end
|
262
|
+
event_map = event.to_hash
|
263
|
+
|
264
|
+
entry = DatahubPackage.model.RecordEntry::new(@schema)
|
265
|
+
entry.putAttribute("srcId", event_map["host"].to_s)
|
266
|
+
entry.putAttribute("ts", event_map["@timestamp"].to_s)
|
267
|
+
entry.putAttribute("version", event_map["@version"].to_s)
|
268
|
+
entry.putAttribute("srcType", "log")
|
269
|
+
|
270
|
+
is_data_valid = false
|
271
|
+
for i in 0...@columns_size do
|
272
|
+
column_name = @columnnames[i]
|
273
|
+
column_type = @columntypes[i]
|
274
|
+
value = event_map[column_name]
|
275
|
+
if value != nil
|
276
|
+
is_data_valid = check_and_set_data(entry, column_type, i, event_map, column_name)
|
277
|
+
break if !is_data_valid
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
if is_data_valid
|
282
|
+
if @shard_keys.size > 0
|
283
|
+
hash_string = ""
|
284
|
+
for i in 0...@shard_keys.size
|
285
|
+
shard_key = @shard_keys[i]
|
286
|
+
if event_map[shard_key] != nil
|
287
|
+
hash_string += event_map[shard_key].to_s + ","
|
288
|
+
end
|
289
|
+
end
|
290
|
+
hashed_value = java.lang.String.new(hash_string).hashCode()
|
291
|
+
entry.setPartitionKey(hashed_value)
|
292
|
+
else
|
293
|
+
entry.setShardId(shard_id)
|
294
|
+
end
|
295
|
+
entries.push(entry)
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
# puts "total: " + entries.size.to_s
|
300
|
+
|
301
|
+
# 提交列表必须有数据
|
302
|
+
if entries.size > 0
|
303
|
+
put_result = @client.putRecords(@project_name, @topic_name, entries)
|
304
|
+
if put_result.getFailedRecordCount() > 0
|
305
|
+
@logger.info "Put " + put_result.getFailedRecordCount().to_s + " records to datahub failed, total " + entries.size().to_s
|
306
|
+
sleep @retry_interval
|
307
|
+
entries = put_result.getFailedRecords()
|
308
|
+
raise "Write to datahub failed: " + entries.size.to_s
|
309
|
+
else
|
310
|
+
@logger.info "Put data to datahub success, total " + entries.size().to_s
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
rescue DatahubPackage.exception.DatahubServiceException => e
|
315
|
+
@logger.error "Flush data exception: " + e.message #+ " " + e.backtrace.inspect.to_s
|
316
|
+
# shard的状态改变,需要重新加载shard
|
317
|
+
if e.getErrorCode() == "InvalidShardOperation"
|
318
|
+
@shards = get_active_shards(@topic.listShard())
|
319
|
+
@shard_count = @shards.size()
|
320
|
+
|
321
|
+
if @shard_count == 0
|
322
|
+
@logger.error "No active shard available, please check"
|
323
|
+
end
|
324
|
+
elsif e.getErrorCode() == nil
|
325
|
+
sleep @retry_interval
|
326
|
+
end
|
327
|
+
retry_count += 1
|
328
|
+
@logger.warn "Now retry: " + retry_count.to_s
|
329
|
+
retry
|
330
|
+
rescue => e
|
331
|
+
@logger.error "Flush data exception: " + e.message + " " + e.backtrace.inspect.to_s
|
332
|
+
|
333
|
+
# 无限重试
|
334
|
+
if @retry_times < 0
|
335
|
+
retry_count += 1
|
336
|
+
@logger.warn "Now retry: " + retry_count.to_s
|
337
|
+
# puts "Now retry..."
|
338
|
+
sleep @retry_interval
|
339
|
+
retry
|
340
|
+
elsif @retry_times == 0
|
341
|
+
@logger.error "Retry not work, now exit"
|
342
|
+
Process.exit(1)
|
343
|
+
# 继续重试
|
344
|
+
elsif @retry_times > 0
|
345
|
+
retry_count += 1
|
346
|
+
if retry_count > @retry_times
|
347
|
+
@logger.warn "Retry over: " + @retry_times.to_s
|
348
|
+
Process.exit(1)
|
349
|
+
end
|
350
|
+
@logger.warn "Now retry..."
|
351
|
+
sleep @retry_interval
|
352
|
+
retry
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end # def multi_receive
|
356
|
+
|
357
|
+
end # class LogStash::Outputs::Datahub
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-output-datahub'
|
3
|
-
s.version = "1.0.
|
3
|
+
s.version = "1.0.1"
|
4
4
|
s.licenses = ["Apache License (2.0)"]
|
5
5
|
s.summary = "This aliyun-datahub output plugin."
|
6
6
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
|
20
20
|
# Gem dependencies
|
21
21
|
s.add_runtime_dependency 'stud'
|
22
|
-
s.add_runtime_dependency "logstash-core", ">= 2.0.0"
|
22
|
+
s.add_runtime_dependency "logstash-core", ">= 2.0.0"
|
23
23
|
s.add_runtime_dependency "logstash-codec-plain"
|
24
24
|
s.add_development_dependency "logstash-devutils"
|
25
25
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-output-datahub
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aliyun
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: stud
|
@@ -31,9 +31,6 @@ dependencies:
|
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 2.0.0
|
34
|
-
- - "<"
|
35
|
-
- !ruby/object:Gem::Version
|
36
|
-
version: 3.0.0
|
37
34
|
type: :runtime
|
38
35
|
prerelease: false
|
39
36
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -41,9 +38,6 @@ dependencies:
|
|
41
38
|
- - ">="
|
42
39
|
- !ruby/object:Gem::Version
|
43
40
|
version: 2.0.0
|
44
|
-
- - "<"
|
45
|
-
- !ruby/object:Gem::Version
|
46
|
-
version: 3.0.0
|
47
41
|
- !ruby/object:Gem::Dependency
|
48
42
|
name: logstash-codec-plain
|
49
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -126,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
120
|
version: '0'
|
127
121
|
requirements: []
|
128
122
|
rubyforge_project:
|
129
|
-
rubygems_version: 2.
|
123
|
+
rubygems_version: 2.6.10
|
130
124
|
signing_key:
|
131
125
|
specification_version: 4
|
132
126
|
summary: This aliyun-datahub output plugin.
|