fluent-plugin-aliyun-oss 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/AUTHORS +1 -0
- data/ChangeLog +2 -0
- data/Gemfile +6 -0
- data/README.md +342 -0
- data/Rakefile +0 -0
- data/VERSION +1 -0
- data/fluent-plugin-oss.gemspec +24 -0
- data/lib/fluent/plugin/in_oss.rb +348 -0
- data/lib/fluent/plugin/mns/message.rb +52 -0
- data/lib/fluent/plugin/mns/request.rb +81 -0
- data/lib/fluent/plugin/oss_compressor_gzip_command.rb +55 -0
- data/lib/fluent/plugin/oss_compressor_lzma2.rb +45 -0
- data/lib/fluent/plugin/oss_compressor_lzo.rb +45 -0
- data/lib/fluent/plugin/oss_decompressor_gzip_command.rb +41 -0
- data/lib/fluent/plugin/oss_decompressor_lzma2.rb +36 -0
- data/lib/fluent/plugin/oss_decompressor_lzo.rb +36 -0
- data/lib/fluent/plugin/out_oss.rb +423 -0
- data/test/plugin/test_in_oss.rb +166 -0
- data/test/plugin/test_out_oss.rb +175 -0
- metadata +159 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e97dcc54cfbae27991f9c36fb3cfba27eea1b34dbb4e042c59373581dff3dcd9
|
4
|
+
data.tar.gz: c20c73e48b3acd11e18f8f37b5f07617fbe7839b46e9293f5a373c54cf1778fc
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 13b8298032dc9c39fe28cbc7e79f56868df3b022166cf1b3a304c36ea815a86343e73eb4bfbac3bf1604d28a9cde11e197e83e8cb62b0ac8d3469f528c5ee9bc
|
7
|
+
data.tar.gz: 4597eeabd1865a7291c60cc5caffd9125cb1d215b697696649fc5613329d57c5d4bb6389a208bf9e688f92ab0c34aff650a8edc23c5f9df8791a222b02db2bef
|
data/.gitignore
ADDED
data/AUTHORS
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Jinhu Wu <jinhu.wu.nju _at_ gmail.com>
|
data/ChangeLog
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,342 @@
|
|
1
|
+
# Aliyun OSS plugin for [Fluentd](http://github.com/fluent/fluentd)
|
2
|
+
|
3
|
+
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it whatever you want.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
**Fluent OSS output plugin** buffers event logs in local files and uploads them to OSS periodically in background threads.
|
7
|
+
|
8
|
+
This plugin splits events by using the timestamp of event logs. For example, a log '2019-04-09 message Hello' is reached, and then another log '2019-04-10 message World' is reached in this order, the former is stored in "20190409.gz" file, and latter in "20190410.gz" file.
|
9
|
+
|
10
|
+
**Fluent OSS input plugin** reads data from OSS periodically.
|
11
|
+
|
12
|
+
This plugin uses MNS on the same region of the OSS bucket. We must setup MNS and OSS event notification before using this plugin.
|
13
|
+
|
14
|
+
[This document](https://help.aliyun.com/document_detail/52656.html) shows how to setup MNS and OSS event notification.
|
15
|
+
|
16
|
+
This plugin will poll events from MNS queue and extract object keys from these events, and then will read those objects from OSS.
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
|
20
|
+
Simply use RubyGems(Run command in td-agent installation directory):
|
21
|
+
```bash
|
22
|
+
[root@master td-agent]# ./embedded/bin/fluent-gem install fluent-plugin-oss
|
23
|
+
```
|
24
|
+
Then, you can check installed plugin
|
25
|
+
```bash
|
26
|
+
[root@master td-agent]# ./embedded/bin/fluent-gem list fluent-plugin-oss
|
27
|
+
|
28
|
+
*** LOCAL GEMS ***
|
29
|
+
|
30
|
+
fluent-plugin-oss (0.0.1)
|
31
|
+
```
|
32
|
+
|
33
|
+
## Development
|
34
|
+
|
35
|
+
### 1. Plugin Developement and Testing
|
36
|
+
|
37
|
+
#### Code
|
38
|
+
- Install dependencies
|
39
|
+
|
40
|
+
```sh
|
41
|
+
bundle install
|
42
|
+
```
|
43
|
+
|
44
|
+
#### Test
|
45
|
+
|
46
|
+
- Update your dependencies
|
47
|
+
|
48
|
+
```sh
|
49
|
+
bundle install
|
50
|
+
```
|
51
|
+
|
52
|
+
- Run tests
|
53
|
+
You should set environment variables like below:
|
54
|
+
|
55
|
+
test_out_oss.rb
|
56
|
+
```sh
|
57
|
+
STORE_AS="" OSS_ENDPOINT="" ACCESS_KEY_ID="" ACCESS_KEY_SECRET="" OSS_BUCKET="" OSS_PATH="" bundle exec rspec test/plugin/test_out_oss.rb
|
58
|
+
```
|
59
|
+
|
60
|
+
test_in_oss.rb
|
61
|
+
```sh
|
62
|
+
STORE_AS="" OSS_ENDPOINT="" ACCESS_KEY_ID="" ACCESS_KEY_SECRET="" OSS_BUCKET="" MNS_ENDPOINT="" MNS_QUEUE="" bundle exec rspec test/plugin/test_in_oss.rb
|
63
|
+
|
64
|
+
```
|
65
|
+
|
66
|
+
## Usage
|
67
|
+
This is an example of fluent config.
|
68
|
+
|
69
|
+
It will read data posted by HTTP and buffer data to local directory before writing to OSS.
|
70
|
+
You can try it by running curl command:
|
71
|
+
```bash
|
72
|
+
[root@master td-agent]# while [[ 1 ]]; do curl -X POST -d 'json={"json":"message"}' http://localhost:8888/debug.test; done
|
73
|
+
```
|
74
|
+
<match debug.*>
|
75
|
+
@type oss
|
76
|
+
endpoint <OSS endpoint to connect to>
|
77
|
+
bucket <Your Bucket>
|
78
|
+
access_key_id <Your Access Key>
|
79
|
+
access_key_secret <Your Secret Key>
|
80
|
+
path fluent-oss/logs
|
81
|
+
auto_create_bucket true
|
82
|
+
key_format %{path}/%{time_slice}_%{index}_%{thread_id}.%{file_extension}
|
83
|
+
store_as gzip
|
84
|
+
<buffer tag,time>
|
85
|
+
@type file
|
86
|
+
path /var/log/fluent/oss
|
87
|
+
timekey 60 # 1 min partition
|
88
|
+
timekey_wait 20s
|
89
|
+
#timekey_use_utc true
|
90
|
+
</buffer>
|
91
|
+
<format>
|
92
|
+
@type json
|
93
|
+
</format>
|
94
|
+
</match>
|
95
|
+
|
96
|
+
# HTTP input
|
97
|
+
# POST http://localhost:8888/<tag>?json=<json>
|
98
|
+
# POST http://localhost:8888/td.myapp.login?json={"user"%3A"me"}
|
99
|
+
# @see http://docs.fluentd.org/articles/in_http
|
100
|
+
<source>
|
101
|
+
@type http
|
102
|
+
@id input_http
|
103
|
+
port 8888
|
104
|
+
</source>
|
105
|
+
|
106
|
+
|
107
|
+
## Configuration: Output Plugin
|
108
|
+
This plugin supports the following configuration options
|
109
|
+
|
110
|
+
|Configuration|Type|Required|Comments|Default|
|
111
|
+
|:---:|:---:|:---:|:---:|:---|
|
112
|
+
|endpoint|string|Yes|OSS endpoint to connect|
|
113
|
+
|bucket|string|Yes|Your OSS bucket name|
|
114
|
+
|access_key_id|string|Yes|Your access key id|
|
115
|
+
|access_key_secret|string|Yes|Your access secret key|
|
116
|
+
|path|string|No|Prefix that added to the generated file name|fluent/logs|
|
117
|
+
|oss_sdk_log_dir|string|No|OSS SDK log directory|/var/log/td-agent|
|
118
|
+
|upload_crc_enable|bool|No|Enable upload crc check|true|
|
119
|
+
|download_crc_enable|bool|No|Enable download crc check|true|
|
120
|
+
|open_timeout|integer|No|Timeout seconds for open connections|10|
|
121
|
+
|read_timeout|integer|No|Timeout seconds for read response|120|
|
122
|
+
|key_format|string|No|The format of OSS object keys|%{path}/%{time_slice}\_%{index}\_%{thread_id}.%{file_extension}|
|
123
|
+
|store_as|string|No|Archive format on OSS|gzip|
|
124
|
+
|auto_create_bucket|bool|No|Create OSS bucket if it does not exists|true|
|
125
|
+
|overwrite|bool|No|Overwrite already existing OSS path|false|
|
126
|
+
|check_bucket|bool|No|Check bucket if exists or not|true|
|
127
|
+
|check_object|bool|No|Check object before creation|true|
|
128
|
+
|hex_random_length|integer|No|The length of `%{hex_random}` placeholder(4-16)|4|
|
129
|
+
|index_format|string|No|`sprintf` format for `%{index}`|%d|
|
130
|
+
|warn_for_delay|time|No|Set a threshold of events latency and mark these slow events as delayed, output warning logs if delayed events were put into OSS|nil|
|
131
|
+
|
132
|
+
### Some configuration details
|
133
|
+
**key_format**
|
134
|
+
|
135
|
+
The format of OSS object keys. You can use the following built-in variables to generate keys dynamically:
|
136
|
+
* %{path}
|
137
|
+
* %{time_slice}
|
138
|
+
* %{index}
|
139
|
+
* %{file_extension}
|
140
|
+
* %{hex_random}
|
141
|
+
* %{uuid_flush}
|
142
|
+
* %{thread_id}
|
143
|
+
|
144
|
+
* %{path} is exactly the value of **path** configured in the configuration file.
|
145
|
+
E.g., "fluent/logs" in the example configuration above.
|
146
|
+
* %{time_slice} is the time-slice in text that are formatted with **time_slice_format**.
|
147
|
+
* %{index} is the sequential number starts from 0, increments when multiple files are uploaded to OSS in the same time slice.
|
148
|
+
* %{file_extension} depends on **store_as** parameter.
|
149
|
+
* %{thread_id} is the unique ids of flush threads(flush thread number is define by `flush_thread_count`). You can use %{thread_id} with other built-in variables to make unique object names.
|
150
|
+
* %{uuid_flush} a uuid that is renewed everytime the buffer is flushed. If you want to use this placeholder, install `uuidtools` gem first.
|
151
|
+
* %{hex_random} a random hex string that is renewed for each buffer chunk, not
|
152
|
+
guaranteed to be unique. This is used for performance tuning as the article below described,
|
153
|
+
[OSS performance best practice](https://help.aliyun.com/document_detail/64945.html).
|
154
|
+
You can configure the length of string with a
|
155
|
+
`hex_random_length` parameter (Default is 4).
|
156
|
+
|
157
|
+
The default format is `%{path}/%{time_slice}_%{index}_%{thread_id}.%{file_extension}`.
|
158
|
+
For instance, using the example configuration above, actual object keys on OSS
|
159
|
+
will be something like(flush_thread_count is 1):
|
160
|
+
|
161
|
+
"fluent-oss/logs_20190410-10_15_0_69928273148640.gz"
|
162
|
+
"fluent-oss/logs_20190410-10_16_0_69928273148640.gz"
|
163
|
+
"fluent-oss/logs_20190410-10_17_0_69928273148640.gz"
|
164
|
+
|
165
|
+
With the configuration(flush_thread_count is 2):
|
166
|
+
|
167
|
+
key_format %{path}/events/ts=%{time_slice}/events_%{index}_%{thread_id}.%{file_extension}
|
168
|
+
time_slice_format %Y%m%d-%H
|
169
|
+
path fluent-oss/logs
|
170
|
+
|
171
|
+
You get:
|
172
|
+
|
173
|
+
fluent-oss/logs/events/ts=20190410-10/events_0_69997953090220.gz
|
174
|
+
fluent-oss/logs/events/ts=20190410-10/events_0_69997953090620.gz
|
175
|
+
fluent-oss/logs/events/ts=20190410-10/events_1_69997953090220.gz
|
176
|
+
fluent-oss/logs/events/ts=20190410-10/events_1_69997953090620.gz
|
177
|
+
fluent-oss/logs/events/ts=20190410-10/events_2_69997953090220.gz
|
178
|
+
fluent-oss/logs/events/ts=20190410-10/events_2_69997953090620.gz
|
179
|
+
|
180
|
+
This plugin also supports add hostname to the final object keys, with the configuration:
|
181
|
+
|
182
|
+
**Note:** You should add double quotes to value of `key_format` if use this feature
|
183
|
+
|
184
|
+
key_format "%{path}/events/ts=%{time_slice}/#{Socket.gethostname}/events_%{index}_%{thread_id}.%{file_extension}"
|
185
|
+
time_slice_format %Y%m%d-%H
|
186
|
+
path fluent-oss/logs
|
187
|
+
|
188
|
+
You get(flush_thread_count is 1):
|
189
|
+
|
190
|
+
fluent-oss/logs/events/ts=20190410-10/master/events_0_70186087552680.gz
|
191
|
+
fluent-oss/logs/events/ts=20190410-10/master/events_1_70186087552680.gz
|
192
|
+
fluent-oss/logs/events/ts=20190410-10/master/events_2_70186087552680.gz
|
193
|
+
|
194
|
+
**store_as**
|
195
|
+
|
196
|
+
archive format on OSS. You can use several format:
|
197
|
+
* gzip (default)
|
198
|
+
* json
|
199
|
+
* text
|
200
|
+
* lzo (Need lzop command)
|
201
|
+
* lzma2 (Need xz command)
|
202
|
+
* gzip_command (Need gzip command)
|
203
|
+
* This compressor uses an external gzip command, hence would result in
|
204
|
+
utilizing CPU cores well compared with `gzip`
|
205
|
+
|
206
|
+
**auto_create_bucket**
|
207
|
+
|
208
|
+
Create OSS bucket if it does not exists. Default is true.
|
209
|
+
|
210
|
+
**check_bucket**
|
211
|
+
|
212
|
+
Check configured bucket if it exists or not. Default is true.
|
213
|
+
When it is false, fluentd will not check the existence of the configured bucket.
|
214
|
+
This is the case where bucket will be pre-created before running fluentd.
|
215
|
+
|
216
|
+
**check_object**
|
217
|
+
|
218
|
+
Check object before creation if it exists or not. Default is true.
|
219
|
+
|
220
|
+
When it is false, key_format will be %{path}/%{time_slice}\_%{hms_slice}\_%{thread_id}.%{file_extension} by default where,
|
221
|
+
hms_slice will be time-slice in hhmmss format. With hms_slice and thread_id, each object is unique.
|
222
|
+
Example object name, assuming it is created on 2019/04/10 10:30:54 AM 20190410_103054_70186087552260.txt (extension can be anything as per user's choice)
|
223
|
+
|
224
|
+
**path**
|
225
|
+
|
226
|
+
Path prefix of the files on OSS. Default is "fluent-oss/logs".
|
227
|
+
|
228
|
+
**time_slice_format**
|
229
|
+
|
230
|
+
Format of the time used as the file name. Default is '%Y%m%d'. Use
|
231
|
+
'%Y%m%d%H' to split files hourly.
|
232
|
+
|
233
|
+
**utc**
|
234
|
+
|
235
|
+
Use UTC instead of local time.
|
236
|
+
|
237
|
+
**hex_random_length**
|
238
|
+
|
239
|
+
The length of `%{hex_random}` placeholder. Default is 4.
|
240
|
+
|
241
|
+
**index_format**
|
242
|
+
|
243
|
+
`%{index}` is formatted by [sprintf](http://ruby-doc.org/core-2.2.0/Kernel.html#method-i-sprintf) using this format_string. Default is '%d'. Zero padding is supported e.g. `%04d` to ensure minimum length four digits. `%{index}` can be in lowercase or uppercase hex using '%x' or '%X'
|
244
|
+
|
245
|
+
**overwrite**
|
246
|
+
|
247
|
+
Overwrite already existing path. Default is false, which raises an error
|
248
|
+
if an OSS object of the same path already exists, or increment the
|
249
|
+
`%{index}` placeholder until finding an absent path.
|
250
|
+
|
251
|
+
**warn_for_delay**
|
252
|
+
|
253
|
+
Set a threshold to treat events as delay, output warning logs if delayed events were put into OSS.
|
254
|
+
|
255
|
+
## Configuration: Input Plugin
|
256
|
+
|
257
|
+
|Configuration|Type|Required|Comments|Default|
|
258
|
+
|:---:|:---:|:---:|:---:|:---|
|
259
|
+
|endpoint|string|Yes|OSS endpoint to connect|
|
260
|
+
|bucket|string|Yes|Your OSS bucket name|
|
261
|
+
|access_key_id|string|Yes|Your access key id|
|
262
|
+
|access_key_secret|string|Yes|Your access secret key|
|
263
|
+
|oss_sdk_log_dir|string|No|OSS SDK log directory|/var/log/td-agent|
|
264
|
+
|upload_crc_enable|bool|No|Enable upload crc check|true|
|
265
|
+
|download_crc_enable|bool|No|Enable download crc check|true|
|
266
|
+
|open_timeout|integer|No|Timeout seconds for open connections|10|
|
267
|
+
|read_timeout|integer|No|Timeout seconds for read response|120|
|
268
|
+
|store_as|string|No|Archive format on OSS|gzip|
|
269
|
+
|flush_batch_lines|integer|No|Flush to down streams every `flush_batch_lines` lines.|10000|
|
270
|
+
|flush_pause_milliseconds|integer|No|Sleep interval between two flushes to downstream.|1|
|
271
|
+
|store_local|bool|No|Store OSS Objects to local or memory before parsing(Used for objects with `text`/`json`/`gzip` formats)|true|
|
272
|
+
|mns|configuration section|Yes|MNS configurations|
|
273
|
+
|
274
|
+
### Usage
|
275
|
+
This is an example of fluent config.
|
276
|
+
|
277
|
+
<source>
|
278
|
+
@type oss
|
279
|
+
endpoint <OSS endpoint to connect to>
|
280
|
+
bucket <Your Bucket>
|
281
|
+
access_key_id <Your Access Key>
|
282
|
+
access_key_secret <Your Secret Key>
|
283
|
+
flush_batch_lines 1000
|
284
|
+
<mns>
|
285
|
+
endpoint <MNS endpoint to connect to, E.g.,{account-id}.mns.cn-zhangjiakou-internal.aliyuncs.com>
|
286
|
+
queue <MNS queue>
|
287
|
+
wait_seconds 10
|
288
|
+
poll_interval_seconds 10
|
289
|
+
</mns>
|
290
|
+
</source>
|
291
|
+
|
292
|
+
### Some configuration details
|
293
|
+
|
294
|
+
**store_as**
|
295
|
+
archive format on OSS. You can use several format:
|
296
|
+
* gzip (default)
|
297
|
+
* json
|
298
|
+
* text
|
299
|
+
* lzo (Need lzop command)
|
300
|
+
* lzma2 (Need xz command)
|
301
|
+
* gzip_command (Need gzip command)
|
302
|
+
* This compressor uses an external gzip command, hence would result in
|
303
|
+
utilizing CPU cores well compared with `gzip`
|
304
|
+
|
305
|
+
**flush_batch_lines**
|
306
|
+
|
307
|
+
Flush to down streams every `flush_batch_lines` lines.
|
308
|
+
|
309
|
+
**flush_pause_milliseconds**
|
310
|
+
|
311
|
+
Sleep interval between two flushes to downstream. Default is 1ms, and wil not sleep if `flush_pause_milliseconds` is less than or equal to 0.
|
312
|
+
|
313
|
+
**store_local(default is true)**
|
314
|
+
|
315
|
+
Store OSS Objects to local or memory before parsing(Used for objects with `text`/`json`/`gzip` formats).
|
316
|
+
|
317
|
+
Objects with `lzo`/`lzma2`/`gzip_command` formats are always stored to local directory before parsing.
|
318
|
+
|
319
|
+
**format**
|
320
|
+
|
321
|
+
Parse a line as this format in the OSS object. Supported formats are "apache_error", "apache2", "syslog", "json", "tsv", "ltsv", "csv", "nginx" and "none".
|
322
|
+
|
323
|
+
**mns**
|
324
|
+
|
325
|
+
[MNS consume messages](https://help.aliyun.com/document_detail/35136.html)
|
326
|
+
|
327
|
+
* endpoint
|
328
|
+
* queue
|
329
|
+
* wait_seconds
|
330
|
+
* poll_interval_seconds Poll messages interval from MNS
|
331
|
+
|
332
|
+
For more details about mns configurations, please view MNS documentation in the link above.
|
333
|
+
|
334
|
+
## Website, license, et. al.
|
335
|
+
|
336
|
+
| Web site | http://fluentd.org/ |
|
337
|
+
|-------------------|-------------------------------------------|
|
338
|
+
| Documents | http://docs.fluentd.org/ |
|
339
|
+
| Source repository | http://github.com/aliyun/fluent-plugin-oss |
|
340
|
+
| Discussion | http://groups.google.com/group/fluentd |
|
341
|
+
| Author | Jinhu Wu |
|
342
|
+
| License | Apache License, Version 2.0 |
|
data/Rakefile
ADDED
File without changes
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,24 @@
|
|
1
|
+
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'fluent-plugin-aliyun-oss'
|
5
|
+
gem.description = 'Aliyun OSS plugin for Fluentd event collector'
|
6
|
+
gem.license = 'Apache-2.0'
|
7
|
+
gem.homepage = 'https://github.com/aliyun/fluent-plugin-oss'
|
8
|
+
gem.summary = gem.description
|
9
|
+
gem.version = File.read('VERSION').strip
|
10
|
+
gem.authors = ['Jinhu Wu']
|
11
|
+
gem.email = 'jinhu.wu.nju@gmail.com'
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map do |f|
|
15
|
+
File.basename(f)
|
16
|
+
end
|
17
|
+
gem.require_paths = ['lib']
|
18
|
+
|
19
|
+
gem.add_dependency 'aliyun-sdk', ['0.7.0']
|
20
|
+
gem.add_dependency 'fluentd', ['>= 0.14.22', '< 2']
|
21
|
+
gem.add_development_dependency 'rake', '~> 0.9', '>= 0.9.2'
|
22
|
+
gem.add_development_dependency 'test-unit', '~> 3.0', '>= 3.0.8'
|
23
|
+
gem.add_development_dependency 'test-unit-rr', '~> 1.0', '>= 1.0.3'
|
24
|
+
end
|
@@ -0,0 +1,348 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'aliyun/oss'
|
3
|
+
require 'aliyun/sts'
|
4
|
+
require 'base64'
|
5
|
+
require 'fluent/plugin/mns/request'
|
6
|
+
require 'fluent/plugin/mns/message'
|
7
|
+
require 'json'
|
8
|
+
require 'zlib'
|
9
|
+
|
10
|
+
# This is Fluent OSS Input Plugin
|
11
|
+
module Fluent
|
12
|
+
# Fluent OSS Plugin
|
13
|
+
module Plugin
|
14
|
+
# OSSInput class implementation
|
15
|
+
class OSSInput < Input
|
16
|
+
Fluent::Plugin.register_input('oss', self)
|
17
|
+
|
18
|
+
helpers :compat_parameters, :parser, :thread
|
19
|
+
|
20
|
+
DEFAULT_PARSE_TYPE = 'none'.freeze
|
21
|
+
|
22
|
+
desc 'OSS endpoint to connect to'
|
23
|
+
config_param :endpoint, :string
|
24
|
+
desc 'Your bucket name'
|
25
|
+
config_param :bucket, :string
|
26
|
+
desc 'Your access key id'
|
27
|
+
config_param :access_key_id, :string
|
28
|
+
desc 'Your access secret key'
|
29
|
+
config_param :access_key_secret, :string
|
30
|
+
config_param :upload_crc_enable, :bool, default: true
|
31
|
+
config_param :download_crc_enable, :bool, default: true
|
32
|
+
desc 'Timeout for open connections'
|
33
|
+
config_param :open_timeout, :integer, default: 10
|
34
|
+
desc 'Timeout for read response'
|
35
|
+
config_param :read_timeout, :integer, default: 120
|
36
|
+
|
37
|
+
desc 'OSS SDK log directory'
|
38
|
+
config_param :oss_sdk_log_dir, :string, default: '/var/log/td-agent'
|
39
|
+
|
40
|
+
desc 'Archive format on OSS'
|
41
|
+
config_param :store_as, :string, default: 'gzip'
|
42
|
+
|
43
|
+
desc 'Flush to down streams every `flush_batch_lines` lines'
|
44
|
+
config_param :flush_batch_lines, :integer, default: 1000
|
45
|
+
|
46
|
+
desc 'Sleep interval between two flushes to downstream'
|
47
|
+
config_param :flush_pause_milliseconds, :integer, default: 1
|
48
|
+
|
49
|
+
desc 'Store OSS Objects to local or memory before parsing'
|
50
|
+
config_param :store_local, :bool, default: true
|
51
|
+
|
52
|
+
config_section :mns, required: true, multi: false do
|
53
|
+
desc 'MNS endpoint to connect to'
|
54
|
+
config_param :endpoint, :string
|
55
|
+
desc 'MNS queue to poll messages'
|
56
|
+
config_param :queue, :string
|
57
|
+
desc 'MNS max waiting time to receive messages'
|
58
|
+
config_param :wait_seconds, :integer, default: nil
|
59
|
+
desc 'Poll messages interval from MNS'
|
60
|
+
config_param :poll_interval_seconds, :integer, default: 30
|
61
|
+
end
|
62
|
+
|
63
|
+
def initialize
|
64
|
+
super
|
65
|
+
@decompressor = nil
|
66
|
+
end
|
67
|
+
|
68
|
+
desc 'Tag string'
|
69
|
+
config_param :tag, :string, default: 'input.oss'
|
70
|
+
|
71
|
+
config_section :parse do
|
72
|
+
config_set_default :@type, DEFAULT_PARSE_TYPE
|
73
|
+
end
|
74
|
+
|
75
|
+
def configure(conf)
|
76
|
+
super
|
77
|
+
|
78
|
+
raise Fluent::ConfigError, 'Invalid oss endpoint' if @endpoint.nil?
|
79
|
+
|
80
|
+
raise Fluent::ConfigError, 'Invalid mns endpoint' if @mns.endpoint.nil?
|
81
|
+
|
82
|
+
raise Fluent::ConfigError, 'Invalid mns queue' if @mns.queue.nil?
|
83
|
+
|
84
|
+
@decompressor = DECOMPRESSOR_REGISTRY.lookup(@store_as).new(log: log)
|
85
|
+
@decompressor.configure(conf)
|
86
|
+
|
87
|
+
parser_config = conf.elements('parse').first
|
88
|
+
@parser = parser_create(conf: parser_config, default_type: DEFAULT_PARSE_TYPE)
|
89
|
+
|
90
|
+
@flush_pause_milliseconds *= 0.001
|
91
|
+
end
|
92
|
+
|
93
|
+
def multi_workers_ready?
|
94
|
+
true
|
95
|
+
end
|
96
|
+
|
97
|
+
def start
|
98
|
+
@oss_sdk_log_dir += '/' unless @oss_sdk_log_dir.end_with?('/')
|
99
|
+
Aliyun::Common::Logging.set_log_file(@oss_sdk_log_dir + Aliyun::Common::Logging::DEFAULT_LOG_FILE)
|
100
|
+
create_oss_client unless @oss
|
101
|
+
|
102
|
+
check_bucket
|
103
|
+
super
|
104
|
+
|
105
|
+
@running = true
|
106
|
+
thread_create(:in_oss, &method(:run))
|
107
|
+
end
|
108
|
+
|
109
|
+
def check_bucket
|
110
|
+
unless @oss.bucket_exist?(@bucket)
|
111
|
+
raise "The specified bucket does not exist: bucket = #{@bucket}"
|
112
|
+
end
|
113
|
+
|
114
|
+
@bucket_handler = @oss.get_bucket(@bucket)
|
115
|
+
end
|
116
|
+
|
117
|
+
def create_oss_client
|
118
|
+
@oss = Aliyun::OSS::Client.new(
|
119
|
+
endpoint: @endpoint,
|
120
|
+
access_key_id: @access_key_id,
|
121
|
+
access_key_secret: @access_key_secret,
|
122
|
+
download_crc_enable: @download_crc_enable,
|
123
|
+
upload_crc_enable: @upload_crc_enable,
|
124
|
+
open_timeout: @open_timeout,
|
125
|
+
read_timeout: @read_timeout
|
126
|
+
)
|
127
|
+
end
|
128
|
+
|
129
|
+
def shutdown
|
130
|
+
@running = false
|
131
|
+
super
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def run
|
137
|
+
while @running
|
138
|
+
log.info "start to poll message from MNS queue #{@mns.queue}"
|
139
|
+
message = receive_message(@mns.queue, @mns.wait_seconds)
|
140
|
+
process(Fluent::Plugin::MNS::Message.new(@mns.queue, message)) unless message.nil?
|
141
|
+
sleep(@mns.poll_interval_seconds)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def receive_message(queue, wait_seconds)
|
146
|
+
request_opts = {}
|
147
|
+
request_opts = { waitseconds: wait_seconds } if wait_seconds
|
148
|
+
opts = {
|
149
|
+
log: log,
|
150
|
+
method: 'GET',
|
151
|
+
endpoint: @mns.endpoint,
|
152
|
+
path: "/queues/#{queue}/messages",
|
153
|
+
access_key_id: @access_key_id,
|
154
|
+
access_key_secret: @access_key_secret
|
155
|
+
}
|
156
|
+
Fluent::Plugin::MNS::Request.new(opts, {}, request_opts).execute
|
157
|
+
end
|
158
|
+
|
159
|
+
def process(message)
|
160
|
+
objects = get_objects(message)
|
161
|
+
objects.each do |object|
|
162
|
+
key = object.key
|
163
|
+
log.info "read object #{key}, size #{object.size} from OSS"
|
164
|
+
|
165
|
+
if @bucket_handler.object_exists?(key)
|
166
|
+
if @decompressor.save_to_local?
|
167
|
+
io = Tempfile.new('chunk-' + @store_as + '-in-')
|
168
|
+
io.binmode
|
169
|
+
@bucket_handler.get_object(key) do |chunk|
|
170
|
+
io.write(chunk)
|
171
|
+
end
|
172
|
+
else
|
173
|
+
io = StringIO.new
|
174
|
+
@bucket_handler.get_object(key) do |chunk|
|
175
|
+
io << chunk
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
io.rewind
|
180
|
+
|
181
|
+
begin
|
182
|
+
content = @decompressor.decompress(io)
|
183
|
+
rescue StandardError => ex
|
184
|
+
log.warn "#{ex}, skip object #{key}"
|
185
|
+
next
|
186
|
+
end
|
187
|
+
|
188
|
+
es = Fluent::MultiEventStream.new
|
189
|
+
content.each_line do |line|
|
190
|
+
@parser.parse(line) do |time, record|
|
191
|
+
es.add(time, record)
|
192
|
+
end
|
193
|
+
|
194
|
+
if es.size >= @flush_batch_lines
|
195
|
+
router.emit_stream(@tag, es)
|
196
|
+
es = Fluent::MultiEventStream.new
|
197
|
+
if @flush_pause_milliseconds > 0
|
198
|
+
sleep(@flush_pause_milliseconds)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
router.emit_stream(@tag, es)
|
203
|
+
io.close(true) rescue nil if @decompressor.save_to_local?
|
204
|
+
else
|
205
|
+
log.warn "in_oss: object #{key} does not exist!"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
delete_message(@mns.queue, message)
|
209
|
+
end
|
210
|
+
|
211
|
+
def get_objects(message)
|
212
|
+
objects = []
|
213
|
+
events = JSON.parse(Base64.decode64(message.body))['events']
|
214
|
+
events.each do |event|
|
215
|
+
objects.push(OSSObject.new(event['eventName'],
|
216
|
+
@bucket,
|
217
|
+
event['oss']['object']['key'],
|
218
|
+
event['oss']['object']['size'],
|
219
|
+
event['oss']['object']['eTag']))
|
220
|
+
end
|
221
|
+
objects
|
222
|
+
end
|
223
|
+
|
224
|
+
def delete_message(queue, message)
|
225
|
+
request_opts = { ReceiptHandle: message.receipt_handle }
|
226
|
+
opts = {
|
227
|
+
log: log,
|
228
|
+
method: 'DELETE',
|
229
|
+
endpoint: @mns.endpoint,
|
230
|
+
path: "/queues/#{queue}/messages",
|
231
|
+
access_key_id: @access_key_id,
|
232
|
+
access_key_secret: @access_key_secret
|
233
|
+
}
|
234
|
+
Fluent::Plugin::MNS::Request.new(opts, {}, request_opts).execute
|
235
|
+
end
|
236
|
+
|
237
|
+
# OSS Object class from MNS events
|
238
|
+
class OSSObject
|
239
|
+
attr_reader :event_name, :bucket, :key, :size, :etag
|
240
|
+
def initialize(event_name, bucket, key, size, etag)
|
241
|
+
@event_name = event_name
|
242
|
+
@bucket = bucket
|
243
|
+
@key = key
|
244
|
+
@size = size
|
245
|
+
@etag = etag
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
# Decompression base class.
|
250
|
+
class Decompressor
|
251
|
+
include Fluent::Configurable
|
252
|
+
|
253
|
+
attr_reader :log
|
254
|
+
|
255
|
+
def initialize(opts = {})
|
256
|
+
super()
|
257
|
+
@log = opts[:log]
|
258
|
+
end
|
259
|
+
|
260
|
+
def ext; end
|
261
|
+
|
262
|
+
def save_to_local?
|
263
|
+
true
|
264
|
+
end
|
265
|
+
|
266
|
+
def content_type; end
|
267
|
+
|
268
|
+
def decompress(io); end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
def check_command(command, encode = nil)
|
273
|
+
require 'open3'
|
274
|
+
|
275
|
+
encode = command if encode.nil?
|
276
|
+
begin
|
277
|
+
Open3.capture3("#{command} -V")
|
278
|
+
rescue Errno::ENOENT
|
279
|
+
raise Fluent::ConfigError,
|
280
|
+
"'#{command}' utility must be in PATH for #{encode} decompression"
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
# Gzip decompression.
|
286
|
+
class GZipDecompressor < Decompressor
|
287
|
+
def ext
|
288
|
+
'gz'.freeze
|
289
|
+
end
|
290
|
+
|
291
|
+
def content_type
|
292
|
+
'application/x-gzip'.freeze
|
293
|
+
end
|
294
|
+
|
295
|
+
def decompress(io)
|
296
|
+
Zlib::GzipReader.wrap(io)
|
297
|
+
end
|
298
|
+
|
299
|
+
def save_to_local?
|
300
|
+
config['store_local']
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
# Text decompression.
|
305
|
+
class TextDecompressor < Decompressor
|
306
|
+
def ext
|
307
|
+
'txt'.freeze
|
308
|
+
end
|
309
|
+
|
310
|
+
def content_type
|
311
|
+
'text/plain'.freeze
|
312
|
+
end
|
313
|
+
|
314
|
+
def decompress(io)
|
315
|
+
io
|
316
|
+
end
|
317
|
+
|
318
|
+
def save_to_local?
|
319
|
+
config['store_local']
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
# Json decompression.
|
324
|
+
class JsonDecompressor < TextDecompressor
|
325
|
+
def ext
|
326
|
+
'json'.freeze
|
327
|
+
end
|
328
|
+
|
329
|
+
def content_type
|
330
|
+
'application/json'.freeze
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
DECOMPRESSOR_REGISTRY = Fluent::Registry.new(:oss_decompressor_type, 'fluent/plugin/oss_decompressor_')
|
335
|
+
{
|
336
|
+
'gzip' => GZipDecompressor,
|
337
|
+
'text' => TextDecompressor,
|
338
|
+
'json' => JsonDecompressor
|
339
|
+
}.each do |name, decompressor|
|
340
|
+
DECOMPRESSOR_REGISTRY.register(name, decompressor)
|
341
|
+
end
|
342
|
+
|
343
|
+
def self.register_decompressor(name, decompressor)
|
344
|
+
DECOMPRESSOR_REGISTRY.register(name, decompressor)
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|