logstash-output-datahub 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +2 -0
- data/CONTRIBUTORS +0 -0
- data/Gemfile +2 -0
- data/LICENSE +13 -0
- data/README.md +115 -0
- data/lib/logstash/outputs/datahub-test.rb +11 -0
- data/lib/logstash/outputs/datahub.rb +341 -0
- data/logstash-output-datahub.gemspec +25 -0
- data/spec/outputs/datahub.rb +22 -0
- data/vendor/jar-dependencies/runtime-jars/aliyun-sdk-datahub-2.2.1-SNAPSHOT.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/bouncycastle.provider-1.38-jdk15.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/commons-codec-1.9.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/commons-io-2.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/commons-lang3-3.3.2.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/gson-2.6.2.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/jackson-annotations-2.4.0.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/jackson-core-2.4.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/jackson-core-asl-1.9.13.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/jackson-databind-2.4.4.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/jackson-mapper-asl-1.9.13.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/log4j-1.2.17.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/lz4-1.3.0.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.12.jar +0 -0
- data/vendor/jar-dependencies/runtime-jars/slf4j-log4j12-1.7.12.jar +0 -0
- metadata +134 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 41b554cbbdc8d2ea64dd05f8284660a499ce1346
|
4
|
+
data.tar.gz: df5877d7d4039a857c1fcaa2e5ba195ca7d0e7e7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c7fa8a628c8953db4af20fc7938879884484d092248c3e6bbbf476f785bec4509b305c55428f4b25b2603e572a10013143a35feb215248b17f4d8232a285fdf6
|
7
|
+
data.tar.gz: 59a9bb7aea19acea1f829b1891f9702141368764d2351cada3ecdb9e7471f1df82340ba5ba41f97b84ad31a36ff182479d753bff02bd41b4fcf1b6648824c99f
|
data/CHANGELOG.md
ADDED
data/CONTRIBUTORS
ADDED
File without changes
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2012–2015 Elasticsearch <http://www.elastic.co>
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
# Aliyun DataHub Plugin for LogStash
|
2
|
+
|
3
|
+
## Getting Started
|
4
|
+
---
|
5
|
+
|
6
|
+
### 介绍
|
7
|
+
|
8
|
+
- 该插件是基于logstash开发的输出插件,它主要完成数据采集到DataHub(阿里云产品)存储服务上
|
9
|
+
|
10
|
+
### 安装
|
11
|
+
+ 环境要求linux, jdk1.7+, logstash(可选,如果没安装也没关系)
|
12
|
+
+ 从流计算官网下载tar包,使用以下命令安装
|
13
|
+
|
14
|
+
如果之前没安装过logstash,请使用以下步骤安装
|
15
|
+
|
16
|
+
```
|
17
|
+
$ tar -xzvf logstash-with-datahub-2.3.0.tar.gz
|
18
|
+
$ cd logstash-with-datahub-2.3.0
|
19
|
+
```
|
20
|
+
|
21
|
+
如果之前安装过logstash,拿到logstash-output-datahub-1.0.0.gem,再使用以下命令安装
|
22
|
+
|
23
|
+
```
|
24
|
+
$ ${LOGSTASH_HOME}/bin/logstash-plugin install --local logstash-output-datahub-1.0.0.gem
|
25
|
+
```
|
26
|
+
|
27
|
+
### 样例
|
28
|
+
+ 采集日志
|
29
|
+
|
30
|
+
以下是一个应用打出的日志,格式如下:
|
31
|
+
|
32
|
+
```
|
33
|
+
20:04:30.359 [qtp1453606810-20] INFO AuditInterceptor - [13pn9kdr5tl84stzkmaa8vmg] end /web/v1/project/fhp4clxfbu0w3ym2n7ee6ynh/statistics?executionName=bayes_poc_test GET, 187 ms
|
34
|
+
```
|
35
|
+
|
36
|
+
在datahub的topic的schema如下:
|
37
|
+
|
38
|
+
```
|
39
|
+
request_time, STRING
|
40
|
+
thread_id, STRING
|
41
|
+
log_level, STRING
|
42
|
+
class_name, STRING
|
43
|
+
request_id, STRING
|
44
|
+
detail, STRING
|
45
|
+
```
|
46
|
+
|
47
|
+
logstash的配置如下:
|
48
|
+
|
49
|
+
```
|
50
|
+
input {
|
51
|
+
file {
|
52
|
+
path => "${APP_HOME}/log/app.log"
|
53
|
+
start_position => "beginning"
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
filter{
|
58
|
+
grok {
|
59
|
+
match => {
|
60
|
+
"message" => "(?<request_time>\d\d:\d\d:\d\d\.\d+)\s+\[(?<thread_id>[\w\-]+)\]\s+(?<log_level>\w+)\s+(?<class_name>\w+)\s+\-(?<detail>.+)"
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
output {
|
66
|
+
datahub {
|
67
|
+
access_id => ""
|
68
|
+
access_key => ""
|
69
|
+
endpoint => ""
|
70
|
+
project_name => ""
|
71
|
+
topic_name => ""
|
72
|
+
#shard_id => "0"
|
73
|
+
#shard_keys => ["thread_id"]
|
74
|
+
batch_size => 10
|
75
|
+
batch_timeout => 5
|
76
|
+
dirty_data_continue => true
|
77
|
+
dirty_data_file => "/Users/ph0ly/trash/dirty.data"
|
78
|
+
dirty_data_file_max_size => 1000
|
79
|
+
}
|
80
|
+
}
|
81
|
+
```
|
82
|
+
|
83
|
+
### 参数介绍
|
84
|
+
```
|
85
|
+
access_id(Required): 阿里云access id
|
86
|
+
access_key(Required): 阿里云access key
|
87
|
+
endpoint(Required): 阿里云datahub的服务地址
|
88
|
+
project_name(Required): datahub项目名称
|
89
|
+
topic_name(Required): datahub topic名称
|
90
|
+
retry_times(Optional): 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
|
91
|
+
retry_interval(Optional): 下一次重试的间隔,单位为秒
|
92
|
+
batch_size(Optional): 批量提交大小,指定数据积攒到@batch_size大小时触发一次提交,默认100
|
93
|
+
batch_timeout(Optional): 批量提交超时,在数据量较少的情况下,数据超时后的超时提交,默认5秒
|
94
|
+
shard_keys(Optional):数组类型,数据落shard的字段名称,插件会根据这些字段的值计算hash将每条数据落某个shard, 注意shard_keys和shard_id都未指定,默认轮询落shard
|
95
|
+
shard_id(Optional): 所有数据落指定的shard,注意shard_keys和shard_id都未指定,默认轮询落shard
|
96
|
+
dirty_data_continue(Optional): 脏数据是否继续运行,默认为false,如果指定true,则遇到脏数据直接无视,继续处理数据。当开启该开关,必须指定@dirty_data_file文件
|
97
|
+
dirty_data_file(Optional): 脏数据文件名称,当数据文件名称,在@dirty_data_continue开启的情况下,需要指定该值。特别注意:脏数据文件将被分割成两个部分.part1和.part2,part1作为更早的脏数据,part2作为更新的数据
|
98
|
+
dirty_data_file_max_size(Optional): 脏数据文件的最大大小,该值保证脏数据文件最大大小不超过这个值,目前该值仅是一个参考值
|
99
|
+
```
|
100
|
+
|
101
|
+
## 相关参考
|
102
|
+
---
|
103
|
+
|
104
|
+
- [LogStash主页](https://www.elastic.co/products/logstash)
|
105
|
+
- [LogStash插件开发](https://www.elastic.co/guide/en/logstash/current/_how_to_write_a_logstash_output_plugin.html#_coding_output_plugins)
|
106
|
+
|
107
|
+
## Authors && Contributors
|
108
|
+
---
|
109
|
+
|
110
|
+
- [Huang Tao](https://github.com/ph0ly)
|
111
|
+
|
112
|
+
## License
|
113
|
+
---
|
114
|
+
|
115
|
+
licensed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.html)
|
@@ -0,0 +1,341 @@
|
|
1
|
+
#
|
2
|
+
#Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
#or more contributor license agreements. See the NOTICE file
|
4
|
+
#distributed with this work for additional information
|
5
|
+
#regarding copyright ownership. The ASF licenses this file
|
6
|
+
#to you under the Apache License, Version 2.0 (the
|
7
|
+
#"License"); you may not use this file except in compliance
|
8
|
+
#with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
#Unless required by applicable law or agreed to in writing,
|
13
|
+
#software distributed under the License is distributed on an
|
14
|
+
#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
#KIND, either express or implied. See the License for the
|
16
|
+
#specific language governing permissions and limitations
|
17
|
+
#under the License.
|
18
|
+
#
|
19
|
+
require "logstash/outputs/base"
|
20
|
+
require "logstash/namespace"
|
21
|
+
require "logstash/environment"
|
22
|
+
require "fileutils"
|
23
|
+
require "thread"
|
24
|
+
|
25
|
+
jar_path=File.expand_path(File.join(File.dirname(__FILE__), "../../.."))
|
26
|
+
LogStash::Environment.load_runtime_jars! File.join(jar_path, "vendor")
|
27
|
+
|
28
|
+
# Datahub output plugin
|
29
|
+
class LogStash::Outputs::Datahub < LogStash::Outputs::Base
|
30
|
+
declare_threadsafe!
|
31
|
+
|
32
|
+
config_name "datahub"
|
33
|
+
|
34
|
+
# datahub access id
|
35
|
+
config :access_id, :validate => :string, :required => true
|
36
|
+
|
37
|
+
# datahub access key
|
38
|
+
config :access_key, :validate => :string, :required => true
|
39
|
+
|
40
|
+
# datahub service endpoint
|
41
|
+
config :endpoint, :validate => :string, :required => true
|
42
|
+
|
43
|
+
# datahub project name
|
44
|
+
config :project_name, :validate => :string, :required => true
|
45
|
+
|
46
|
+
# datahub topic name
|
47
|
+
config :topic_name, :validate => :string, :required => true
|
48
|
+
|
49
|
+
# 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
|
50
|
+
config :retry_times, :validate => :number, :required => false, :default => -1
|
51
|
+
|
52
|
+
# 重试周期,下一次重试的间隔,单位为秒
|
53
|
+
config :retry_interval, :validate => :number, :required => false, :default => 5
|
54
|
+
|
55
|
+
# 按照指定字段的值计算hash,依据于该hash值落某个shard
|
56
|
+
config :shard_keys, :validate => :array, :required => false, :default => []
|
57
|
+
|
58
|
+
# 指定数据落指定的shard
|
59
|
+
config :shard_id, :validate => :string, :required => false, :default => ""
|
60
|
+
|
61
|
+
# # 提交的列名,用户可以配置topic的列,采集部分列或者全部列
|
62
|
+
# # 默认为空数组,表示按照topic的顺序及全字段提交
|
63
|
+
# # 另外:列的配置不用保序,但是要求该字段在topic的schema中存在
|
64
|
+
# config :column_names, :validate => :array, :required => false, :default => []
|
65
|
+
|
66
|
+
# 当出现脏数据时,是否继续写入
|
67
|
+
# 当开启该开关,必须指定@dirty_data_file文件
|
68
|
+
config :dirty_data_continue, :validate => :boolean, :required => false, :default => false
|
69
|
+
|
70
|
+
# 脏数据文件名称,当数据文件名称,在@dirty_data_continue开启的情况下,需要指定该值
|
71
|
+
# 特别注意:脏数据文件将被分割成两个部分.part1和.part2,part1作为更早的脏数据,part2作为更新的数据
|
72
|
+
config :dirty_data_file, :validate => :string, :required => false
|
73
|
+
|
74
|
+
# 脏数据文件的最大大小,该值保证脏数据文件最大大小不超过这个值,目前该值仅是一个参考值
|
75
|
+
config :dirty_data_file_max_size, :validate => :number, :required => false, :default => 50024000
|
76
|
+
|
77
|
+
# 数据传输压缩方式选择,目前支持deflate, lz4格式
|
78
|
+
config :compress_method, :validate => :string, :required => false, :default => ""
|
79
|
+
|
80
|
+
# 该值内部使用,不提供配置
|
81
|
+
# 分发shard的游标
|
82
|
+
attr_accessor :shard_cursor
|
83
|
+
|
84
|
+
# Shard cursor lock
|
85
|
+
@@shard_lock = Mutex.new
|
86
|
+
|
87
|
+
# 写文件锁
|
88
|
+
@@file_lock = Mutex.new
|
89
|
+
|
90
|
+
DatahubPackage = com.aliyun.datahub
|
91
|
+
|
92
|
+
public
|
93
|
+
def register
|
94
|
+
begin
|
95
|
+
@account = DatahubPackage.auth.AliyunAccount::new(@access_id, @access_key)
|
96
|
+
@conf = DatahubPackage.DatahubConfiguration::new(@account, @endpoint)
|
97
|
+
if @compress_method == "deflate" || @compress_method == "lz4"
|
98
|
+
@compression_format = DatahubPackage.model.compress.CompressionFormat.fromValue(@compress_method)
|
99
|
+
@conf.setCompressionFormat(@compression_format)
|
100
|
+
end
|
101
|
+
|
102
|
+
@client = DatahubPackage.DatahubClient::new(@conf)
|
103
|
+
@project = DatahubPackage.wrapper.Project::Builder.build(@project_name, @client)
|
104
|
+
@topic = @project.getTopic(@topic_name)
|
105
|
+
@shard_cursor = 0
|
106
|
+
|
107
|
+
@shards = get_active_shards(@topic.listShard())
|
108
|
+
@shard_count = @shards.size()
|
109
|
+
|
110
|
+
result = @client.getTopic(@project_name, @topic_name)
|
111
|
+
@schema = result.getRecordSchema()
|
112
|
+
fields = @schema.getFields()
|
113
|
+
@columns_size = fields.size
|
114
|
+
@columns = []
|
115
|
+
for i in 0...@columns_size
|
116
|
+
@columns.push(fields[i].getName())
|
117
|
+
end
|
118
|
+
|
119
|
+
# 前置校验参数
|
120
|
+
check_params()
|
121
|
+
|
122
|
+
if @shard_count == 0
|
123
|
+
@logger.error "No active shard available, please check"
|
124
|
+
raise "No active shard available, please check"
|
125
|
+
end
|
126
|
+
|
127
|
+
@logger.info "Init datahub success!"
|
128
|
+
rescue => e
|
129
|
+
@logger.error "Init failed!" + e.message + " " + e.backtrace.inspect.to_s
|
130
|
+
raise e
|
131
|
+
end
|
132
|
+
end # def register
|
133
|
+
|
134
|
+
def check_params()
|
135
|
+
# 如果shard_id配置了,则检查该shard是否ok
|
136
|
+
if !@shard_id.empty?
|
137
|
+
valid = false
|
138
|
+
for i in 0...@shards.size
|
139
|
+
shard_entry = @shards[i]
|
140
|
+
if shard_entry.getShardId() == @shard_id && shard_entry.getState() == DatahubPackage.model.ShardState::ACTIVE
|
141
|
+
valid = true
|
142
|
+
end
|
143
|
+
end
|
144
|
+
if (!valid)
|
145
|
+
@logger.error "Config shard_id not exists or state not active, check your config"
|
146
|
+
raise "Config shard_id not exists or state not active, check your config"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# 检查shard_keys字段是否合法
|
151
|
+
if @shard_keys.size > 0
|
152
|
+
for i in 0...@shard_keys.size
|
153
|
+
shard_key = @shard_keys[i]
|
154
|
+
if !@schema.containsField(shard_key)
|
155
|
+
@logger.error "Config shard_keys contains one or one more unknown field, check your config"
|
156
|
+
raise "Config shard_keys contains one or one more unknown field, check your config"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# 配置了脏数据继续,必须指定脏数据文件
|
162
|
+
if @dirty_data_continue
|
163
|
+
if @dirty_data_file.to_s.chomp.length == 0
|
164
|
+
raise "Dirty data file path can not be empty"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
169
|
+
|
170
|
+
# 检查并设置数据到entry中
|
171
|
+
# 如果解析数据异常,则数据落脏数据文件
|
172
|
+
def check_and_set_data(entry, field_type, index, event_map, column_name)
|
173
|
+
data = event_map[column_name]
|
174
|
+
begin
|
175
|
+
if field_type == DatahubPackage.common.data.FieldType::STRING
|
176
|
+
entry.setString(index, data.to_s)
|
177
|
+
elsif field_type == DatahubPackage.common.data.FieldType::BIGINT
|
178
|
+
entry.setBigint(index, java.lang.Long.parseLong(data.to_s))
|
179
|
+
elsif field_type == DatahubPackage.common.data.FieldType::DOUBLE
|
180
|
+
entry.setDouble(index, java.lang.Double.parseDouble(data.to_s))
|
181
|
+
elsif field_type == DatahubPackage.common.data.FieldType::BOOLEAN
|
182
|
+
entry.setBoolean(index, java.lang.Boolean.parseBoolean(data.to_s))
|
183
|
+
elsif field_type == DatahubPackage.common.data.FieldType::TIMESTAMP
|
184
|
+
entry.setTimeStamp(index, java.lang.Long.parseLong(data.to_s))
|
185
|
+
else
|
186
|
+
raise "Unknown schema type of data"
|
187
|
+
end
|
188
|
+
return true
|
189
|
+
rescue => e
|
190
|
+
@logger.error "Parse data: " + column_name + "[" + data + "] failed, " + e.message
|
191
|
+
# 数据格式有异常,根据配置参数确定是否续跑
|
192
|
+
if !@dirty_data_continue
|
193
|
+
@logger.error "Dirty data found, exit process now."
|
194
|
+
puts "Dirty data found, exit process now."
|
195
|
+
Process.exit(1)
|
196
|
+
# 忽略的异常数据直接落文件
|
197
|
+
else
|
198
|
+
write_as_dirty_data(event_map)
|
199
|
+
end
|
200
|
+
return false
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
# 脏数据文件处理
|
205
|
+
def write_as_dirty_data(event_amp)
|
206
|
+
dirty_file_part1_name = @dirty_data_file + ".part1"
|
207
|
+
dirty_file_part2_name = @dirty_data_file + ".part2"
|
208
|
+
|
209
|
+
# 加锁写入
|
210
|
+
@@file_lock.synchronize {
|
211
|
+
dirty_file_part2 = File.open(dirty_file_part2_name, "a+")
|
212
|
+
dirty_file_part2.puts(event_amp.to_s)
|
213
|
+
dirty_file_part2.close
|
214
|
+
if File.size(dirty_file_part2_name) > @dirty_data_file_max_size / 2
|
215
|
+
# .part1, .part2分别存储数据
|
216
|
+
# 旧数据落part1,新的数据落part2
|
217
|
+
FileUtils.mv(dirty_file_part2_name, dirty_file_part1_name)
|
218
|
+
end
|
219
|
+
}
|
220
|
+
end
|
221
|
+
|
222
|
+
def get_active_shards(shards)
|
223
|
+
active_shards = []
|
224
|
+
for i in 0...shards.size
|
225
|
+
entry = shards.get(i)
|
226
|
+
if entry.getState() == DatahubPackage.model.ShardState::ACTIVE
|
227
|
+
active_shards.push(entry)
|
228
|
+
end
|
229
|
+
end
|
230
|
+
return active_shards
|
231
|
+
end
|
232
|
+
|
233
|
+
def get_next_shard_id()
|
234
|
+
if !@shard_id.empty?
|
235
|
+
return @shard_id
|
236
|
+
# 否则轮询写入shard
|
237
|
+
else
|
238
|
+
idx = 0
|
239
|
+
@@shard_lock.synchronize {
|
240
|
+
idx = @shard_cursor % @shard_count
|
241
|
+
@shard_cursor = idx + 1
|
242
|
+
}
|
243
|
+
shard_id = @shards[idx].getShardId()
|
244
|
+
return shard_id
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def multi_receive(event_list)
|
249
|
+
begin
|
250
|
+
entries = []
|
251
|
+
shard_id = get_next_shard_id()
|
252
|
+
|
253
|
+
event_list.each do |event|
|
254
|
+
if event == LogStash::SHUTDOWN
|
255
|
+
return
|
256
|
+
end
|
257
|
+
event_map = event.to_hash
|
258
|
+
|
259
|
+
entry = DatahubPackage.model.RecordEntry::new(@schema)
|
260
|
+
#entry.putAttribute("srcId", event_map["host"].to_s)
|
261
|
+
#entry.putAttribute("ts", event_map["@timestamp"].to_s)
|
262
|
+
#entry.putAttribute("version", event_map["@version"].to_s)
|
263
|
+
#entry.putAttribute("srcType", "log")
|
264
|
+
|
265
|
+
for i in 0...@columns_size do
|
266
|
+
value = event_map[@columns[i]]
|
267
|
+
if value != nil
|
268
|
+
entry.set(i, value)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
if @shard_keys.size > 0
|
273
|
+
hash_string = ""
|
274
|
+
for i in 0...@shard_keys.size
|
275
|
+
shard_key = @shard_keys[i]
|
276
|
+
if event_map[shard_key] != nil
|
277
|
+
hash_string += event_map[shard_key].to_s + ","
|
278
|
+
end
|
279
|
+
end
|
280
|
+
hashed_value = java.lang.String.new(hash_string).hashCode()
|
281
|
+
entry.setPartitionKey(hashed_value)
|
282
|
+
else
|
283
|
+
entry.setShardId(shard_id)
|
284
|
+
end
|
285
|
+
entries.push(entry)
|
286
|
+
end
|
287
|
+
|
288
|
+
# puts "total: " + entries.size.to_s
|
289
|
+
|
290
|
+
# 提交列表必须有数据
|
291
|
+
if entries.size > 0
|
292
|
+
put_result = @client.putRecords(@project_name, @topic_name, entries)
|
293
|
+
if put_result.getFailedRecordCount() > 0
|
294
|
+
@logger.info "Put " + put_result.getFailedRecordCount().to_s + " records to datahub failed, total " + entries.size().to_s
|
295
|
+
sleep @retry_interval
|
296
|
+
entries = put_result.getFailedRecords()
|
297
|
+
@logger.info "write to datahub, failed: " + entries.size.to_s
|
298
|
+
else
|
299
|
+
@logger.info "Put data to datahub success, total " + entries.size().to_s
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
rescue DatahubPackage.exception.DatahubServiceException => e
|
304
|
+
@logger.error "Flush data exception: " + e.message #+ " " + e.backtrace.inspect.to_s
|
305
|
+
# shard的状态改变,需要重新加载shard
|
306
|
+
if e.getErrorCode() == "InvalidShardOperation"
|
307
|
+
@shards = get_active_shards(@topic.listShard())
|
308
|
+
@shard_count = @shards.size()
|
309
|
+
|
310
|
+
if @shard_count == 0
|
311
|
+
@logger.error "No active shard available, please check"
|
312
|
+
end
|
313
|
+
elsif e.getErrorCode() == nil
|
314
|
+
sleep @retry_interval
|
315
|
+
end
|
316
|
+
retry
|
317
|
+
rescue => e
|
318
|
+
@logger.error "Flush data exception: " + e.message + " " + e.backtrace.inspect.to_s
|
319
|
+
|
320
|
+
# 无限重试
|
321
|
+
if @retry_times < 0
|
322
|
+
@logger.warn "Now retry..."
|
323
|
+
# puts "Now retry..."
|
324
|
+
sleep @retry_interval
|
325
|
+
retry
|
326
|
+
# 重试次数用完
|
327
|
+
elsif @retry_times == 0
|
328
|
+
@logger.error "Retry not work, now exit"
|
329
|
+
Process.exit(1)
|
330
|
+
# 继续重试
|
331
|
+
elsif @retry_times > 0
|
332
|
+
@logger.warn "Now retry..."
|
333
|
+
# puts "Now retry..."
|
334
|
+
sleep @retry_interval
|
335
|
+
@retry_times -= 1
|
336
|
+
retry
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end # def multi_receive
|
340
|
+
|
341
|
+
end # class LogStash::Outputs::Datahub
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-output-datahub'
|
3
|
+
s.version = "1.0.0"
|
4
|
+
s.licenses = ["Apache License (2.0)"]
|
5
|
+
s.summary = "This aliyun-datahub output plugin."
|
6
|
+
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
7
|
+
s.authors = ["Aliyun"]
|
8
|
+
s.email = "stream@service.aliyun.com"
|
9
|
+
s.homepage = "https://datahub.console.aliyun.com/datahub"
|
10
|
+
s.require_paths = ["lib"]
|
11
|
+
#s.platform = 'java'
|
12
|
+
# Files
|
13
|
+
s.files = Dir['lib/**/*','lib/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE']
|
14
|
+
# Tests
|
15
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
16
|
+
|
17
|
+
# Special flag to let us know this is actually a logstash plugin
|
18
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
|
19
|
+
|
20
|
+
# Gem dependencies
|
21
|
+
s.add_runtime_dependency 'stud'
|
22
|
+
s.add_runtime_dependency "logstash-core", ">= 2.0.0", "< 3.0.0"
|
23
|
+
s.add_runtime_dependency "logstash-codec-plain"
|
24
|
+
s.add_development_dependency "logstash-devutils"
|
25
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/devutils/rspec/spec_helper"
|
3
|
+
require "logstash/outputs/datahub"
|
4
|
+
require "logstash/codecs/plain"
|
5
|
+
require "logstash/event"
|
6
|
+
|
7
|
+
describe LogStash::Outputs::Datahub do
|
8
|
+
let(:sample_event) { LogStash::Event.new }
|
9
|
+
let(:output) { LogStash::Outputs::Datahub.new }
|
10
|
+
|
11
|
+
before do
|
12
|
+
output.register
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "receive message" do
|
16
|
+
subject { output.receive(sample_event) }
|
17
|
+
|
18
|
+
it "returns a string" do
|
19
|
+
expect(subject).to eq("Event received")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
metadata
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-output-datahub
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aliyun
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-09-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: stud
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: logstash-core
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.0.0
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 3.0.0
|
37
|
+
type: :runtime
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 2.0.0
|
44
|
+
- - "<"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 3.0.0
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: logstash-codec-plain
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: logstash-devutils
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
type: :development
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
description: This gem is a logstash plugin required to be installed on top of the
|
76
|
+
Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not
|
77
|
+
a stand-alone program
|
78
|
+
email: stream@service.aliyun.com
|
79
|
+
executables: []
|
80
|
+
extensions: []
|
81
|
+
extra_rdoc_files: []
|
82
|
+
files:
|
83
|
+
- CHANGELOG.md
|
84
|
+
- CONTRIBUTORS
|
85
|
+
- Gemfile
|
86
|
+
- LICENSE
|
87
|
+
- README.md
|
88
|
+
- lib/logstash/outputs/datahub-test.rb
|
89
|
+
- lib/logstash/outputs/datahub.rb
|
90
|
+
- logstash-output-datahub.gemspec
|
91
|
+
- spec/outputs/datahub.rb
|
92
|
+
- vendor/jar-dependencies/runtime-jars/aliyun-sdk-datahub-2.2.1-SNAPSHOT.jar
|
93
|
+
- vendor/jar-dependencies/runtime-jars/bouncycastle.provider-1.38-jdk15.jar
|
94
|
+
- vendor/jar-dependencies/runtime-jars/commons-codec-1.9.jar
|
95
|
+
- vendor/jar-dependencies/runtime-jars/commons-io-2.4.jar
|
96
|
+
- vendor/jar-dependencies/runtime-jars/commons-lang3-3.3.2.jar
|
97
|
+
- vendor/jar-dependencies/runtime-jars/gson-2.6.2.jar
|
98
|
+
- vendor/jar-dependencies/runtime-jars/jackson-annotations-2.4.0.jar
|
99
|
+
- vendor/jar-dependencies/runtime-jars/jackson-core-2.4.4.jar
|
100
|
+
- vendor/jar-dependencies/runtime-jars/jackson-core-asl-1.9.13.jar
|
101
|
+
- vendor/jar-dependencies/runtime-jars/jackson-databind-2.4.4.jar
|
102
|
+
- vendor/jar-dependencies/runtime-jars/jackson-mapper-asl-1.9.13.jar
|
103
|
+
- vendor/jar-dependencies/runtime-jars/log4j-1.2.17.jar
|
104
|
+
- vendor/jar-dependencies/runtime-jars/lz4-1.3.0.jar
|
105
|
+
- vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.12.jar
|
106
|
+
- vendor/jar-dependencies/runtime-jars/slf4j-log4j12-1.7.12.jar
|
107
|
+
homepage: https://datahub.console.aliyun.com/datahub
|
108
|
+
licenses:
|
109
|
+
- Apache License (2.0)
|
110
|
+
metadata:
|
111
|
+
logstash_plugin: 'true'
|
112
|
+
logstash_group: output
|
113
|
+
post_install_message:
|
114
|
+
rdoc_options: []
|
115
|
+
require_paths:
|
116
|
+
- lib
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubyforge_project:
|
129
|
+
rubygems_version: 2.4.5.1
|
130
|
+
signing_key:
|
131
|
+
specification_version: 4
|
132
|
+
summary: This aliyun-datahub output plugin.
|
133
|
+
test_files:
|
134
|
+
- spec/outputs/datahub.rb
|