logstash-output-datahub 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -4
- data/lib/logstash/outputs/datahub.rb +357 -341
- data/logstash-output-datahub.gemspec +2 -2
- metadata +3 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e846682c134462b56c1ee792a34143797b6b34ce
|
4
|
+
data.tar.gz: a36cd441f580ffd6763fc41cae69de4c7924f05b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb309d8008b270ac7a8b485c57d48e2d85e3e548ec4d8d963fa4b318b7250b54f07a4cc26ce4ef67a18b80dda43ca4f589282765b2ab87de333093cc1189205c
|
7
|
+
data.tar.gz: b48d2a40c2677e0c262383765970fe63396bff623bd94449f95b1ebf04dbc6a0a5a902af4ccca2c01ebabafad1d5e9bd38efec8344ffdf0a9e73320b85f7f09b
|
data/README.md
CHANGED
@@ -71,8 +71,6 @@ output {
|
|
71
71
|
topic_name => ""
|
72
72
|
#shard_id => "0"
|
73
73
|
#shard_keys => ["thread_id"]
|
74
|
-
batch_size => 10
|
75
|
-
batch_timeout => 5
|
76
74
|
dirty_data_continue => true
|
77
75
|
dirty_data_file => "/Users/ph0ly/trash/dirty.data"
|
78
76
|
dirty_data_file_max_size => 1000
|
@@ -89,8 +87,6 @@ project_name(Required): datahub项目名称
|
|
89
87
|
topic_name(Required): datahub topic名称
|
90
88
|
retry_times(Optional): 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
|
91
89
|
retry_interval(Optional): 下一次重试的间隔,单位为秒
|
92
|
-
batch_size(Optional): 批量提交大小,指定数据积攒到@batch_size大小时触发一次提交,默认100
|
93
|
-
batch_timeout(Optional): 批量提交超时,在数据量较少的情况下,数据超时后的超时提交,默认5秒
|
94
90
|
shard_keys(Optional):数组类型,数据落shard的字段名称,插件会根据这些字段的值计算hash将每条数据落某个shard, 注意shard_keys和shard_id都未指定,默认轮询落shard
|
95
91
|
shard_id(Optional): 所有数据落指定的shard,注意shard_keys和shard_id都未指定,默认轮询落shard
|
96
92
|
dirty_data_continue(Optional): 脏数据是否继续运行,默认为false,如果指定true,则遇到脏数据直接无视,继续处理数据。当开启该开关,必须指定@dirty_data_file文件
|
@@ -1,341 +1,357 @@
|
|
1
|
-
#
|
2
|
-
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
-
#or more contributor license agreements. See the NOTICE file
|
4
|
-
#distributed with this work for additional information
|
5
|
-
#regarding copyright ownership. The ASF licenses this file
|
6
|
-
#to you under the Apache License, Version 2.0 (the
|
7
|
-
#"License"); you may not use this file except in compliance
|
8
|
-
#with the License. You may obtain a copy of the License at
|
9
|
-
#
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
#
|
12
|
-
#Unless required by applicable law or agreed to in writing,
|
13
|
-
#software distributed under the License is distributed on an
|
14
|
-
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
-
#KIND, either express or implied. See the License for the
|
16
|
-
#specific language governing permissions and limitations
|
17
|
-
#under the License.
|
18
|
-
#
|
19
|
-
require "logstash/outputs/base"
|
20
|
-
require "logstash/namespace"
|
21
|
-
require "logstash/environment"
|
22
|
-
require "fileutils"
|
23
|
-
require "thread"
|
24
|
-
|
25
|
-
jar_path=File.expand_path(File.join(File.dirname(__FILE__), "../../.."))
|
26
|
-
LogStash::Environment.load_runtime_jars! File.join(jar_path, "vendor")
|
27
|
-
|
28
|
-
# Datahub output plugin
|
29
|
-
class LogStash::Outputs::Datahub < LogStash::Outputs::Base
|
30
|
-
declare_threadsafe!
|
31
|
-
|
32
|
-
config_name "datahub"
|
33
|
-
|
34
|
-
# datahub access id
|
35
|
-
config :access_id, :validate => :string, :required => true
|
36
|
-
|
37
|
-
# datahub access key
|
38
|
-
config :access_key, :validate => :string, :required => true
|
39
|
-
|
40
|
-
# datahub service endpoint
|
41
|
-
config :endpoint, :validate => :string, :required => true
|
42
|
-
|
43
|
-
# datahub project name
|
44
|
-
config :project_name, :validate => :string, :required => true
|
45
|
-
|
46
|
-
# datahub topic name
|
47
|
-
config :topic_name, :validate => :string, :required => true
|
48
|
-
|
49
|
-
# 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
|
50
|
-
config :retry_times, :validate => :number, :required => false, :default => -1
|
51
|
-
|
52
|
-
# 重试周期,下一次重试的间隔,单位为秒
|
53
|
-
config :retry_interval, :validate => :number, :required => false, :default => 5
|
54
|
-
|
55
|
-
# 按照指定字段的值计算hash,依据于该hash值落某个shard
|
56
|
-
config :shard_keys, :validate => :array, :required => false, :default => []
|
57
|
-
|
58
|
-
# 指定数据落指定的shard
|
59
|
-
config :shard_id, :validate => :string, :required => false, :default => ""
|
60
|
-
|
61
|
-
# # 提交的列名,用户可以配置topic的列,采集部分列或者全部列
|
62
|
-
# # 默认为空数组,表示按照topic的顺序及全字段提交
|
63
|
-
# # 另外:列的配置不用保序,但是要求该字段在topic的schema中存在
|
64
|
-
# config :column_names, :validate => :array, :required => false, :default => []
|
65
|
-
|
66
|
-
# 当出现脏数据时,是否继续写入
|
67
|
-
# 当开启该开关,必须指定@dirty_data_file文件
|
68
|
-
config :dirty_data_continue, :validate => :boolean, :required => false, :default => false
|
69
|
-
|
70
|
-
# 脏数据文件名称,当数据文件名称,在@dirty_data_continue开启的情况下,需要指定该值
|
71
|
-
# 特别注意:脏数据文件将被分割成两个部分.part1和.part2,part1作为更早的脏数据,part2作为更新的数据
|
72
|
-
config :dirty_data_file, :validate => :string, :required => false
|
73
|
-
|
74
|
-
# 脏数据文件的最大大小,该值保证脏数据文件最大大小不超过这个值,目前该值仅是一个参考值
|
75
|
-
config :dirty_data_file_max_size, :validate => :number, :required => false, :default => 50024000
|
76
|
-
|
77
|
-
# 数据传输压缩方式选择,目前支持deflate, lz4格式
|
78
|
-
config :compress_method, :validate => :string, :required => false, :default => ""
|
79
|
-
|
80
|
-
# 该值内部使用,不提供配置
|
81
|
-
# 分发shard的游标
|
82
|
-
attr_accessor :shard_cursor
|
83
|
-
|
84
|
-
# Shard cursor lock
|
85
|
-
@@shard_lock = Mutex.new
|
86
|
-
|
87
|
-
# 写文件锁
|
88
|
-
@@file_lock = Mutex.new
|
89
|
-
|
90
|
-
DatahubPackage = com.aliyun.datahub
|
91
|
-
|
92
|
-
public
|
93
|
-
def register
|
94
|
-
begin
|
95
|
-
@account = DatahubPackage.auth.AliyunAccount::new(@access_id, @access_key)
|
96
|
-
@conf = DatahubPackage.DatahubConfiguration::new(@account, @endpoint)
|
97
|
-
if @compress_method == "deflate" || @compress_method == "lz4"
|
98
|
-
@compression_format = DatahubPackage.model.compress.CompressionFormat.fromValue(@compress_method)
|
99
|
-
@conf.setCompressionFormat(@compression_format)
|
100
|
-
end
|
101
|
-
|
102
|
-
@client = DatahubPackage.DatahubClient::new(@conf)
|
103
|
-
@project = DatahubPackage.wrapper.Project::Builder.build(@project_name, @client)
|
104
|
-
@topic = @project.getTopic(@topic_name)
|
105
|
-
@shard_cursor = 0
|
106
|
-
|
107
|
-
@shards = get_active_shards(@topic.listShard())
|
108
|
-
@shard_count = @shards.size()
|
109
|
-
|
110
|
-
result = @client.getTopic(@project_name, @topic_name)
|
111
|
-
@schema = result.getRecordSchema()
|
112
|
-
fields = @schema.getFields()
|
113
|
-
@columns_size = fields.size
|
114
|
-
@
|
115
|
-
for i in 0...@columns_size
|
116
|
-
@
|
117
|
-
end
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
end
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
entry.
|
181
|
-
elsif field_type == DatahubPackage.common.data.FieldType::
|
182
|
-
entry.
|
183
|
-
elsif field_type == DatahubPackage.common.data.FieldType::
|
184
|
-
entry.
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
retry
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
require "logstash/outputs/base"
|
20
|
+
require "logstash/namespace"
|
21
|
+
require "logstash/environment"
|
22
|
+
require "fileutils"
|
23
|
+
require "thread"
|
24
|
+
|
25
|
+
jar_path=File.expand_path(File.join(File.dirname(__FILE__), "../../.."))
|
26
|
+
LogStash::Environment.load_runtime_jars! File.join(jar_path, "vendor")
|
27
|
+
|
28
|
+
# Datahub output plugin
|
29
|
+
class LogStash::Outputs::Datahub < LogStash::Outputs::Base
|
30
|
+
declare_threadsafe!
|
31
|
+
|
32
|
+
config_name "datahub"
|
33
|
+
|
34
|
+
# datahub access id
|
35
|
+
config :access_id, :validate => :string, :required => true
|
36
|
+
|
37
|
+
# datahub access key
|
38
|
+
config :access_key, :validate => :string, :required => true
|
39
|
+
|
40
|
+
# datahub service endpoint
|
41
|
+
config :endpoint, :validate => :string, :required => true
|
42
|
+
|
43
|
+
# datahub project name
|
44
|
+
config :project_name, :validate => :string, :required => true
|
45
|
+
|
46
|
+
# datahub topic name
|
47
|
+
config :topic_name, :validate => :string, :required => true
|
48
|
+
|
49
|
+
# 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
|
50
|
+
config :retry_times, :validate => :number, :required => false, :default => -1
|
51
|
+
|
52
|
+
# 重试周期,下一次重试的间隔,单位为秒
|
53
|
+
config :retry_interval, :validate => :number, :required => false, :default => 5
|
54
|
+
|
55
|
+
# 按照指定字段的值计算hash,依据于该hash值落某个shard
|
56
|
+
config :shard_keys, :validate => :array, :required => false, :default => []
|
57
|
+
|
58
|
+
# 指定数据落指定的shard
|
59
|
+
config :shard_id, :validate => :string, :required => false, :default => ""
|
60
|
+
|
61
|
+
# # 提交的列名,用户可以配置topic的列,采集部分列或者全部列
|
62
|
+
# # 默认为空数组,表示按照topic的顺序及全字段提交
|
63
|
+
# # 另外:列的配置不用保序,但是要求该字段在topic的schema中存在
|
64
|
+
# config :column_names, :validate => :array, :required => false, :default => []
|
65
|
+
|
66
|
+
# 当出现脏数据时,是否继续写入
|
67
|
+
# 当开启该开关,必须指定@dirty_data_file文件
|
68
|
+
config :dirty_data_continue, :validate => :boolean, :required => false, :default => false
|
69
|
+
|
70
|
+
# 脏数据文件名称,当数据文件名称,在@dirty_data_continue开启的情况下,需要指定该值
|
71
|
+
# 特别注意:脏数据文件将被分割成两个部分.part1和.part2,part1作为更早的脏数据,part2作为更新的数据
|
72
|
+
config :dirty_data_file, :validate => :string, :required => false
|
73
|
+
|
74
|
+
# 脏数据文件的最大大小,该值保证脏数据文件最大大小不超过这个值,目前该值仅是一个参考值
|
75
|
+
config :dirty_data_file_max_size, :validate => :number, :required => false, :default => 50024000
|
76
|
+
|
77
|
+
# 数据传输压缩方式选择,目前支持deflate, lz4格式
|
78
|
+
config :compress_method, :validate => :string, :required => false, :default => ""
|
79
|
+
|
80
|
+
# 该值内部使用,不提供配置
|
81
|
+
# 分发shard的游标
|
82
|
+
attr_accessor :shard_cursor
|
83
|
+
|
84
|
+
# Shard cursor lock
|
85
|
+
@@shard_lock = Mutex.new
|
86
|
+
|
87
|
+
# 写文件锁
|
88
|
+
@@file_lock = Mutex.new
|
89
|
+
|
90
|
+
DatahubPackage = com.aliyun.datahub
|
91
|
+
|
92
|
+
public
|
93
|
+
def register
|
94
|
+
begin
|
95
|
+
@account = DatahubPackage.auth.AliyunAccount::new(@access_id, @access_key)
|
96
|
+
@conf = DatahubPackage.DatahubConfiguration::new(@account, @endpoint)
|
97
|
+
if @compress_method == "deflate" || @compress_method == "lz4"
|
98
|
+
@compression_format = DatahubPackage.model.compress.CompressionFormat.fromValue(@compress_method)
|
99
|
+
@conf.setCompressionFormat(@compression_format)
|
100
|
+
end
|
101
|
+
|
102
|
+
@client = DatahubPackage.DatahubClient::new(@conf)
|
103
|
+
@project = DatahubPackage.wrapper.Project::Builder.build(@project_name, @client)
|
104
|
+
@topic = @project.getTopic(@topic_name)
|
105
|
+
@shard_cursor = 0
|
106
|
+
|
107
|
+
@shards = get_active_shards(@topic.listShard())
|
108
|
+
@shard_count = @shards.size()
|
109
|
+
|
110
|
+
result = @client.getTopic(@project_name, @topic_name)
|
111
|
+
@schema = result.getRecordSchema()
|
112
|
+
fields = @schema.getFields()
|
113
|
+
@columns_size = fields.size
|
114
|
+
@columnnames = []
|
115
|
+
for i in 0...@columns_size
|
116
|
+
@columnnames.push(fields[i].getName())
|
117
|
+
end
|
118
|
+
@columntypes = []
|
119
|
+
for i in 0...@columns_size
|
120
|
+
@columntypes.push(fields[i].getType())
|
121
|
+
end
|
122
|
+
|
123
|
+
# 前置校验参数
|
124
|
+
check_params()
|
125
|
+
|
126
|
+
if @shard_count == 0
|
127
|
+
@logger.error "No active shard available, please check"
|
128
|
+
raise "No active shard available, please check"
|
129
|
+
end
|
130
|
+
|
131
|
+
@logger.info "Init datahub success!"
|
132
|
+
rescue => e
|
133
|
+
@logger.error "Init failed!" + e.message + " " + e.backtrace.inspect.to_s
|
134
|
+
raise e
|
135
|
+
end
|
136
|
+
end # def register
|
137
|
+
|
138
|
+
def check_params()
|
139
|
+
# 如果shard_id配置了,则检查该shard是否ok
|
140
|
+
if !@shard_id.empty?
|
141
|
+
valid = false
|
142
|
+
for i in 0...@shards.size
|
143
|
+
shard_entry = @shards[i]
|
144
|
+
if shard_entry.getShardId() == @shard_id && shard_entry.getState() == DatahubPackage.model.ShardState::ACTIVE
|
145
|
+
valid = true
|
146
|
+
end
|
147
|
+
end
|
148
|
+
if (!valid)
|
149
|
+
@logger.error "Config shard_id not exists or state not active, check your config"
|
150
|
+
raise "Config shard_id not exists or state not active, check your config"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# 检查shard_keys字段是否合法
|
155
|
+
if @shard_keys.size > 0
|
156
|
+
for i in 0...@shard_keys.size
|
157
|
+
shard_key = @shard_keys[i]
|
158
|
+
if !@schema.containsField(shard_key)
|
159
|
+
@logger.error "Config shard_keys contains one or one more unknown field, check your config"
|
160
|
+
raise "Config shard_keys contains one or one more unknown field, check your config"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# 配置了脏数据继续,必须指定脏数据文件
|
166
|
+
if @dirty_data_continue
|
167
|
+
if @dirty_data_file.to_s.chomp.length == 0
|
168
|
+
raise "Dirty data file path can not be empty"
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
# 检查并设置数据到entry中
|
175
|
+
# 如果解析数据异常,则数据落脏数据文件
|
176
|
+
def check_and_set_data(entry, field_type, index, event_map, column_name)
|
177
|
+
data = event_map[column_name]
|
178
|
+
begin
|
179
|
+
if field_type == DatahubPackage.common.data.FieldType::STRING
|
180
|
+
entry.setString(index, data.to_s)
|
181
|
+
elsif field_type == DatahubPackage.common.data.FieldType::BIGINT
|
182
|
+
entry.setBigint(index, java.lang.Long.parseLong(data.to_s))
|
183
|
+
elsif field_type == DatahubPackage.common.data.FieldType::DOUBLE
|
184
|
+
entry.setDouble(index, java.lang.Double.parseDouble(data.to_s))
|
185
|
+
elsif field_type == DatahubPackage.common.data.FieldType::BOOLEAN
|
186
|
+
entry.setBoolean(index, java.lang.Boolean.parseBoolean(data.to_s))
|
187
|
+
elsif field_type == DatahubPackage.common.data.FieldType::TIMESTAMP
|
188
|
+
entry.setTimeStamp(index, java.lang.Long.parseLong(data.to_s))
|
189
|
+
else
|
190
|
+
raise "Unknown schema type of data"
|
191
|
+
end
|
192
|
+
return true
|
193
|
+
rescue => e
|
194
|
+
@logger.error "Parse data: " + column_name + "[" + data + "] failed, " + e.message
|
195
|
+
# 数据格式有异常,根据配置参数确定是否续跑
|
196
|
+
if !@dirty_data_continue
|
197
|
+
@logger.error "Dirty data found, exit process now."
|
198
|
+
puts "Dirty data found, exit process now."
|
199
|
+
Process.exit(1)
|
200
|
+
# 忽略的异常数据直接落文件
|
201
|
+
else
|
202
|
+
write_as_dirty_data(event_map)
|
203
|
+
end
|
204
|
+
return false
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# 脏数据文件处理
|
209
|
+
def write_as_dirty_data(event_amp)
|
210
|
+
dirty_file_part1_name = @dirty_data_file + ".part1"
|
211
|
+
dirty_file_part2_name = @dirty_data_file + ".part2"
|
212
|
+
|
213
|
+
# 加锁写入
|
214
|
+
@@file_lock.synchronize {
|
215
|
+
dirty_file_part2 = File.open(dirty_file_part2_name, "a+")
|
216
|
+
dirty_file_part2.puts(event_amp.to_s)
|
217
|
+
dirty_file_part2.close
|
218
|
+
if File.size(dirty_file_part2_name) > @dirty_data_file_max_size / 2
|
219
|
+
# .part1, .part2分别存储数据
|
220
|
+
# 旧数据落part1,新的数据落part2
|
221
|
+
FileUtils.mv(dirty_file_part2_name, dirty_file_part1_name)
|
222
|
+
end
|
223
|
+
}
|
224
|
+
end
|
225
|
+
|
226
|
+
def get_active_shards(shards)
|
227
|
+
active_shards = []
|
228
|
+
for i in 0...shards.size
|
229
|
+
entry = shards.get(i)
|
230
|
+
if entry.getState() == DatahubPackage.model.ShardState::ACTIVE
|
231
|
+
active_shards.push(entry)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
return active_shards
|
235
|
+
end
|
236
|
+
|
237
|
+
def get_next_shard_id()
|
238
|
+
if !@shard_id.empty?
|
239
|
+
return @shard_id
|
240
|
+
# 否则轮询写入shard
|
241
|
+
else
|
242
|
+
idx = 0
|
243
|
+
@@shard_lock.synchronize {
|
244
|
+
idx = @shard_cursor % @shard_count
|
245
|
+
@shard_cursor = idx + 1
|
246
|
+
}
|
247
|
+
shard_id = @shards[idx].getShardId()
|
248
|
+
return shard_id
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def multi_receive(event_list)
|
253
|
+
retry_count = 0
|
254
|
+
begin
|
255
|
+
entries = []
|
256
|
+
shard_id = get_next_shard_id()
|
257
|
+
|
258
|
+
event_list.each do |event|
|
259
|
+
if event == LogStash::SHUTDOWN
|
260
|
+
return
|
261
|
+
end
|
262
|
+
event_map = event.to_hash
|
263
|
+
|
264
|
+
entry = DatahubPackage.model.RecordEntry::new(@schema)
|
265
|
+
entry.putAttribute("srcId", event_map["host"].to_s)
|
266
|
+
entry.putAttribute("ts", event_map["@timestamp"].to_s)
|
267
|
+
entry.putAttribute("version", event_map["@version"].to_s)
|
268
|
+
entry.putAttribute("srcType", "log")
|
269
|
+
|
270
|
+
is_data_valid = false
|
271
|
+
for i in 0...@columns_size do
|
272
|
+
column_name = @columnnames[i]
|
273
|
+
column_type = @columntypes[i]
|
274
|
+
value = event_map[column_name]
|
275
|
+
if value != nil
|
276
|
+
is_data_valid = check_and_set_data(entry, column_type, i, event_map, column_name)
|
277
|
+
break if !is_data_valid
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
if is_data_valid
|
282
|
+
if @shard_keys.size > 0
|
283
|
+
hash_string = ""
|
284
|
+
for i in 0...@shard_keys.size
|
285
|
+
shard_key = @shard_keys[i]
|
286
|
+
if event_map[shard_key] != nil
|
287
|
+
hash_string += event_map[shard_key].to_s + ","
|
288
|
+
end
|
289
|
+
end
|
290
|
+
hashed_value = java.lang.String.new(hash_string).hashCode()
|
291
|
+
entry.setPartitionKey(hashed_value)
|
292
|
+
else
|
293
|
+
entry.setShardId(shard_id)
|
294
|
+
end
|
295
|
+
entries.push(entry)
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
# puts "total: " + entries.size.to_s
|
300
|
+
|
301
|
+
# 提交列表必须有数据
|
302
|
+
if entries.size > 0
|
303
|
+
put_result = @client.putRecords(@project_name, @topic_name, entries)
|
304
|
+
if put_result.getFailedRecordCount() > 0
|
305
|
+
@logger.info "Put " + put_result.getFailedRecordCount().to_s + " records to datahub failed, total " + entries.size().to_s
|
306
|
+
sleep @retry_interval
|
307
|
+
entries = put_result.getFailedRecords()
|
308
|
+
raise "Write to datahub failed: " + entries.size.to_s
|
309
|
+
else
|
310
|
+
@logger.info "Put data to datahub success, total " + entries.size().to_s
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
rescue DatahubPackage.exception.DatahubServiceException => e
|
315
|
+
@logger.error "Flush data exception: " + e.message #+ " " + e.backtrace.inspect.to_s
|
316
|
+
# shard的状态改变,需要重新加载shard
|
317
|
+
if e.getErrorCode() == "InvalidShardOperation"
|
318
|
+
@shards = get_active_shards(@topic.listShard())
|
319
|
+
@shard_count = @shards.size()
|
320
|
+
|
321
|
+
if @shard_count == 0
|
322
|
+
@logger.error "No active shard available, please check"
|
323
|
+
end
|
324
|
+
elsif e.getErrorCode() == nil
|
325
|
+
sleep @retry_interval
|
326
|
+
end
|
327
|
+
retry_count += 1
|
328
|
+
@logger.warn "Now retry: " + retry_count.to_s
|
329
|
+
retry
|
330
|
+
rescue => e
|
331
|
+
@logger.error "Flush data exception: " + e.message + " " + e.backtrace.inspect.to_s
|
332
|
+
|
333
|
+
# 无限重试
|
334
|
+
if @retry_times < 0
|
335
|
+
retry_count += 1
|
336
|
+
@logger.warn "Now retry: " + retry_count.to_s
|
337
|
+
# puts "Now retry..."
|
338
|
+
sleep @retry_interval
|
339
|
+
retry
|
340
|
+
elsif @retry_times == 0
|
341
|
+
@logger.error "Retry not work, now exit"
|
342
|
+
Process.exit(1)
|
343
|
+
# 继续重试
|
344
|
+
elsif @retry_times > 0
|
345
|
+
retry_count += 1
|
346
|
+
if retry_count > @retry_times
|
347
|
+
@logger.warn "Retry over: " + @retry_times.to_s
|
348
|
+
Process.exit(1)
|
349
|
+
end
|
350
|
+
@logger.warn "Now retry..."
|
351
|
+
sleep @retry_interval
|
352
|
+
retry
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end # def multi_receive
|
356
|
+
|
357
|
+
end # class LogStash::Outputs::Datahub
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-output-datahub'
|
3
|
-
s.version = "1.0.
|
3
|
+
s.version = "1.0.1"
|
4
4
|
s.licenses = ["Apache License (2.0)"]
|
5
5
|
s.summary = "This aliyun-datahub output plugin."
|
6
6
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
|
20
20
|
# Gem dependencies
|
21
21
|
s.add_runtime_dependency 'stud'
|
22
|
-
s.add_runtime_dependency "logstash-core", ">= 2.0.0"
|
22
|
+
s.add_runtime_dependency "logstash-core", ">= 2.0.0"
|
23
23
|
s.add_runtime_dependency "logstash-codec-plain"
|
24
24
|
s.add_development_dependency "logstash-devutils"
|
25
25
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-output-datahub
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aliyun
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: stud
|
@@ -31,9 +31,6 @@ dependencies:
|
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 2.0.0
|
34
|
-
- - "<"
|
35
|
-
- !ruby/object:Gem::Version
|
36
|
-
version: 3.0.0
|
37
34
|
type: :runtime
|
38
35
|
prerelease: false
|
39
36
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -41,9 +38,6 @@ dependencies:
|
|
41
38
|
- - ">="
|
42
39
|
- !ruby/object:Gem::Version
|
43
40
|
version: 2.0.0
|
44
|
-
- - "<"
|
45
|
-
- !ruby/object:Gem::Version
|
46
|
-
version: 3.0.0
|
47
41
|
- !ruby/object:Gem::Dependency
|
48
42
|
name: logstash-codec-plain
|
49
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -126,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
120
|
version: '0'
|
127
121
|
requirements: []
|
128
122
|
rubyforge_project:
|
129
|
-
rubygems_version: 2.
|
123
|
+
rubygems_version: 2.6.10
|
130
124
|
signing_key:
|
131
125
|
specification_version: 4
|
132
126
|
summary: This aliyun-datahub output plugin.
|