fluent-plugin-aliyun-oss 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/AUTHORS +1 -0
- data/ChangeLog +2 -0
- data/Gemfile +6 -0
- data/README.md +342 -0
- data/Rakefile +0 -0
- data/VERSION +1 -0
- data/fluent-plugin-oss.gemspec +24 -0
- data/lib/fluent/plugin/in_oss.rb +348 -0
- data/lib/fluent/plugin/mns/message.rb +52 -0
- data/lib/fluent/plugin/mns/request.rb +81 -0
- data/lib/fluent/plugin/oss_compressor_gzip_command.rb +55 -0
- data/lib/fluent/plugin/oss_compressor_lzma2.rb +45 -0
- data/lib/fluent/plugin/oss_compressor_lzo.rb +45 -0
- data/lib/fluent/plugin/oss_decompressor_gzip_command.rb +41 -0
- data/lib/fluent/plugin/oss_decompressor_lzma2.rb +36 -0
- data/lib/fluent/plugin/oss_decompressor_lzo.rb +36 -0
- data/lib/fluent/plugin/out_oss.rb +423 -0
- data/test/plugin/test_in_oss.rb +166 -0
- data/test/plugin/test_out_oss.rb +175 -0
- metadata +159 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e97dcc54cfbae27991f9c36fb3cfba27eea1b34dbb4e042c59373581dff3dcd9
|
4
|
+
data.tar.gz: c20c73e48b3acd11e18f8f37b5f07617fbe7839b46e9293f5a373c54cf1778fc
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 13b8298032dc9c39fe28cbc7e79f56868df3b022166cf1b3a304c36ea815a86343e73eb4bfbac3bf1604d28a9cde11e197e83e8cb62b0ac8d3469f528c5ee9bc
|
7
|
+
data.tar.gz: 4597eeabd1865a7291c60cc5caffd9125cb1d215b697696649fc5613329d57c5d4bb6389a208bf9e688f92ab0c34aff650a8edc23c5f9df8791a222b02db2bef
|
data/.gitignore
ADDED
data/AUTHORS
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Jinhu Wu <jinhu.wu.nju _at_ gmail.com>
|
data/ChangeLog
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,342 @@
|
|
1
|
+
# Aliyun OSS plugin for [Fluentd](http://github.com/fluent/fluentd)
|
2
|
+
|
3
|
+
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it whatever you want.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
**Fluent OSS output plugin** buffers event logs in local files and uploads them to OSS periodically in background threads.
|
7
|
+
|
8
|
+
This plugin splits events by using the timestamp of event logs. For example, a log '2019-04-09 message Hello' is reached, and then another log '2019-04-10 message World' is reached in this order, the former is stored in "20190409.gz" file, and latter in "20190410.gz" file.
|
9
|
+
|
10
|
+
**Fluent OSS input plugin** reads data from OSS periodically.
|
11
|
+
|
12
|
+
This plugin uses MNS on the same region of the OSS bucket. We must setup MNS and OSS event notification before using this plugin.
|
13
|
+
|
14
|
+
[This document](https://help.aliyun.com/document_detail/52656.html) shows how to setup MNS and OSS event notification.
|
15
|
+
|
16
|
+
This plugin will poll events from MNS queue and extract object keys from these events, and then will read those objects from OSS.
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
|
20
|
+
Simply use RubyGems(Run command in td-agent installation directory):
|
21
|
+
```bash
|
22
|
+
[root@master td-agent]# ./embedded/bin/fluent-gem install fluent-plugin-oss
|
23
|
+
```
|
24
|
+
Then, you can check installed plugin
|
25
|
+
```bash
|
26
|
+
[root@master td-agent]# ./embedded/bin/fluent-gem list fluent-plugin-oss
|
27
|
+
|
28
|
+
*** LOCAL GEMS ***
|
29
|
+
|
30
|
+
fluent-plugin-oss (0.0.1)
|
31
|
+
```
|
32
|
+
|
33
|
+
## Development
|
34
|
+
|
35
|
+
### 1. Plugin Developement and Testing
|
36
|
+
|
37
|
+
#### Code
|
38
|
+
- Install dependencies
|
39
|
+
|
40
|
+
```sh
|
41
|
+
bundle install
|
42
|
+
```
|
43
|
+
|
44
|
+
#### Test
|
45
|
+
|
46
|
+
- Update your dependencies
|
47
|
+
|
48
|
+
```sh
|
49
|
+
bundle install
|
50
|
+
```
|
51
|
+
|
52
|
+
- Run tests
|
53
|
+
You should set environment variables like below:
|
54
|
+
|
55
|
+
test_out_oss.rb
|
56
|
+
```sh
|
57
|
+
STORE_AS="" OSS_ENDPOINT="" ACCESS_KEY_ID="" ACCESS_KEY_SECRET="" OSS_BUCKET="" OSS_PATH="" bundle exec rspec test/plugin/test_out_oss.rb
|
58
|
+
```
|
59
|
+
|
60
|
+
test_in_oss.rb
|
61
|
+
```sh
|
62
|
+
STORE_AS="" OSS_ENDPOINT="" ACCESS_KEY_ID="" ACCESS_KEY_SECRET="" OSS_BUCKET="" MNS_ENDPOINT="" MNS_QUEUE="" bundle exec rspec test/plugin/test_in_oss.rb
|
63
|
+
|
64
|
+
```
|
65
|
+
|
66
|
+
## Usage
|
67
|
+
This is an example of fluent config.
|
68
|
+
|
69
|
+
It will read data posted by HTTP and buffer data to local directory before writing to OSS.
|
70
|
+
You can try it by running curl command:
|
71
|
+
```bash
|
72
|
+
[root@master td-agent]# while [[ 1 ]]; do curl -X POST -d 'json={"json":"message"}' http://localhost:8888/debug.test; done
|
73
|
+
```
|
74
|
+
<match debug.*>
|
75
|
+
@type oss
|
76
|
+
endpoint <OSS endpoint to connect to>
|
77
|
+
bucket <Your Bucket>
|
78
|
+
access_key_id <Your Access Key>
|
79
|
+
access_key_secret <Your Secret Key>
|
80
|
+
path fluent-oss/logs
|
81
|
+
auto_create_bucket true
|
82
|
+
key_format %{path}/%{time_slice}_%{index}_%{thread_id}.%{file_extension}
|
83
|
+
store_as gzip
|
84
|
+
<buffer tag,time>
|
85
|
+
@type file
|
86
|
+
path /var/log/fluent/oss
|
87
|
+
timekey 60 # 1 min partition
|
88
|
+
timekey_wait 20s
|
89
|
+
#timekey_use_utc true
|
90
|
+
</buffer>
|
91
|
+
<format>
|
92
|
+
@type json
|
93
|
+
</format>
|
94
|
+
</match>
|
95
|
+
|
96
|
+
# HTTP input
|
97
|
+
# POST http://localhost:8888/<tag>?json=<json>
|
98
|
+
# POST http://localhost:8888/td.myapp.login?json={"user"%3A"me"}
|
99
|
+
# @see http://docs.fluentd.org/articles/in_http
|
100
|
+
<source>
|
101
|
+
@type http
|
102
|
+
@id input_http
|
103
|
+
port 8888
|
104
|
+
</source>
|
105
|
+
|
106
|
+
|
107
|
+
## Configuration: Output Plugin
|
108
|
+
This plugin supports the following configuration options
|
109
|
+
|
110
|
+
|Configuration|Type|Required|Comments|Default|
|
111
|
+
|:---:|:---:|:---:|:---:|:---|
|
112
|
+
|endpoint|string|Yes|OSS endpoint to connect|
|
113
|
+
|bucket|string|Yes|Your OSS bucket name|
|
114
|
+
|access_key_id|string|Yes|Your access key id|
|
115
|
+
|access_key_secret|string|Yes|Your access secret key|
|
116
|
+
|path|string|No|Prefix that added to the generated file name|fluent/logs|
|
117
|
+
|oss_sdk_log_dir|string|No|OSS SDK log directory|/var/log/td-agent|
|
118
|
+
|upload_crc_enable|bool|No|Enable upload crc check|true|
|
119
|
+
|download_crc_enable|bool|No|Enable download crc check|true|
|
120
|
+
|open_timeout|integer|No|Timeout seconds for open connections|10|
|
121
|
+
|read_timeout|integer|No|Timeout seconds for read response|120|
|
122
|
+
|key_format|string|No|The format of OSS object keys|%{path}/%{time_slice}\_%{index}\_%{thread_id}.%{file_extension}|
|
123
|
+
|store_as|string|No|Archive format on OSS|gzip|
|
124
|
+
|auto_create_bucket|bool|No|Create OSS bucket if it does not exists|true|
|
125
|
+
|overwrite|bool|No|Overwrite already existing OSS path|false|
|
126
|
+
|check_bucket|bool|No|Check bucket if exists or not|true|
|
127
|
+
|check_object|bool|No|Check object before creation|true|
|
128
|
+
|hex_random_length|integer|No|The length of `%{hex_random}` placeholder(4-16)|4|
|
129
|
+
|index_format|string|No|`sprintf` format for `%{index}`|%d|
|
130
|
+
|warn_for_delay|time|No|Set a threshold of events latency and mark these slow events as delayed, output warning logs if delayed events were put into OSS|nil|
|
131
|
+
|
132
|
+
### Some configuration details
|
133
|
+
**key_format**
|
134
|
+
|
135
|
+
The format of OSS object keys. You can use the following built-in variables to generate keys dynamically:
|
136
|
+
* %{path}
|
137
|
+
* %{time_slice}
|
138
|
+
* %{index}
|
139
|
+
* %{file_extension}
|
140
|
+
* %{hex_random}
|
141
|
+
* %{uuid_flush}
|
142
|
+
* %{thread_id}
|
143
|
+
|
144
|
+
* %{path} is exactly the value of **path** configured in the configuration file.
|
145
|
+
E.g., "fluent/logs" in the example configuration above.
|
146
|
+
* %{time_slice} is the time-slice in text that are formatted with **time_slice_format**.
|
147
|
+
* %{index} is the sequential number starts from 0, increments when multiple files are uploaded to OSS in the same time slice.
|
148
|
+
* %{file_extension} depends on **store_as** parameter.
|
149
|
+
* %{thread_id} is the unique ids of flush threads(flush thread number is define by `flush_thread_count`). You can use %{thread_id} with other built-in variables to make unique object names.
|
150
|
+
* %{uuid_flush} a uuid that is renewed everytime the buffer is flushed. If you want to use this placeholder, install `uuidtools` gem first.
|
151
|
+
* %{hex_random} a random hex string that is renewed for each buffer chunk, not
|
152
|
+
guaranteed to be unique. This is used for performance tuning as the article below described,
|
153
|
+
[OSS performance best practice](https://help.aliyun.com/document_detail/64945.html).
|
154
|
+
You can configure the length of string with a
|
155
|
+
`hex_random_length` parameter (Default is 4).
|
156
|
+
|
157
|
+
The default format is `%{path}/%{time_slice}_%{index}_%{thread_id}.%{file_extension}`.
|
158
|
+
For instance, using the example configuration above, actual object keys on OSS
|
159
|
+
will be something like(flush_thread_count is 1):
|
160
|
+
|
161
|
+
"fluent-oss/logs_20190410-10_15_0_69928273148640.gz"
|
162
|
+
"fluent-oss/logs_20190410-10_16_0_69928273148640.gz"
|
163
|
+
"fluent-oss/logs_20190410-10_17_0_69928273148640.gz"
|
164
|
+
|
165
|
+
With the configuration(flush_thread_count is 2):
|
166
|
+
|
167
|
+
key_format %{path}/events/ts=%{time_slice}/events_%{index}_%{thread_id}.%{file_extension}
|
168
|
+
time_slice_format %Y%m%d-%H
|
169
|
+
path fluent-oss/logs
|
170
|
+
|
171
|
+
You get:
|
172
|
+
|
173
|
+
fluent-oss/logs/events/ts=20190410-10/events_0_69997953090220.gz
|
174
|
+
fluent-oss/logs/events/ts=20190410-10/events_0_69997953090620.gz
|
175
|
+
fluent-oss/logs/events/ts=20190410-10/events_1_69997953090220.gz
|
176
|
+
fluent-oss/logs/events/ts=20190410-10/events_1_69997953090620.gz
|
177
|
+
fluent-oss/logs/events/ts=20190410-10/events_2_69997953090220.gz
|
178
|
+
fluent-oss/logs/events/ts=20190410-10/events_2_69997953090620.gz
|
179
|
+
|
180
|
+
This plugin also supports add hostname to the final object keys, with the configuration:
|
181
|
+
|
182
|
+
**Note:** You should add double quotes to value of `key_format` if use this feature
|
183
|
+
|
184
|
+
key_format "%{path}/events/ts=%{time_slice}/#{Socket.gethostname}/events_%{index}_%{thread_id}.%{file_extension}"
|
185
|
+
time_slice_format %Y%m%d-%H
|
186
|
+
path fluent-oss/logs
|
187
|
+
|
188
|
+
You get(flush_thread_count is 1):
|
189
|
+
|
190
|
+
fluent-oss/logs/events/ts=20190410-10/master/events_0_70186087552680.gz
|
191
|
+
fluent-oss/logs/events/ts=20190410-10/master/events_1_70186087552680.gz
|
192
|
+
fluent-oss/logs/events/ts=20190410-10/master/events_2_70186087552680.gz
|
193
|
+
|
194
|
+
**store_as**
|
195
|
+
|
196
|
+
archive format on OSS. You can use several format:
|
197
|
+
* gzip (default)
|
198
|
+
* json
|
199
|
+
* text
|
200
|
+
* lzo (Need lzop command)
|
201
|
+
* lzma2 (Need xz command)
|
202
|
+
* gzip_command (Need gzip command)
|
203
|
+
* This compressor uses an external gzip command, hence would result in
|
204
|
+
utilizing CPU cores well compared with `gzip`
|
205
|
+
|
206
|
+
**auto_create_bucket**
|
207
|
+
|
208
|
+
Create OSS bucket if it does not exists. Default is true.
|
209
|
+
|
210
|
+
**check_bucket**
|
211
|
+
|
212
|
+
Check configured bucket if it exists or not. Default is true.
|
213
|
+
When it is false, fluentd will not check the existence of the configured bucket.
|
214
|
+
This is the case where bucket will be pre-created before running fluentd.
|
215
|
+
|
216
|
+
**check_object**
|
217
|
+
|
218
|
+
Check object before creation if it exists or not. Default is true.
|
219
|
+
|
220
|
+
When it is false, key_format will be %{path}/%{time_slice}\_%{hms_slice}\_%{thread_id}.%{file_extension} by default where,
|
221
|
+
hms_slice will be time-slice in hhmmss format. With hms_slice and thread_id, each object is unique.
|
222
|
+
Example object name, assuming it is created on 2019/04/10 10:30:54 AM 20190410_103054_70186087552260.txt (extension can be anything as per user's choice)
|
223
|
+
|
224
|
+
**path**
|
225
|
+
|
226
|
+
Path prefix of the files on OSS. Default is "fluent-oss/logs".
|
227
|
+
|
228
|
+
**time_slice_format**
|
229
|
+
|
230
|
+
Format of the time used as the file name. Default is '%Y%m%d'. Use
|
231
|
+
'%Y%m%d%H' to split files hourly.
|
232
|
+
|
233
|
+
**utc**
|
234
|
+
|
235
|
+
Use UTC instead of local time.
|
236
|
+
|
237
|
+
**hex_random_length**
|
238
|
+
|
239
|
+
The length of `%{hex_random}` placeholder. Default is 4.
|
240
|
+
|
241
|
+
**index_format**
|
242
|
+
|
243
|
+
`%{index}` is formatted by [sprintf](http://ruby-doc.org/core-2.2.0/Kernel.html#method-i-sprintf) using this format_string. Default is '%d'. Zero padding is supported e.g. `%04d` to ensure minimum length four digits. `%{index}` can be in lowercase or uppercase hex using '%x' or '%X'
|
244
|
+
|
245
|
+
**overwrite**
|
246
|
+
|
247
|
+
Overwrite already existing path. Default is false, which raises an error
|
248
|
+
if an OSS object of the same path already exists, or increment the
|
249
|
+
`%{index}` placeholder until finding an absent path.
|
250
|
+
|
251
|
+
**warn_for_delay**
|
252
|
+
|
253
|
+
Set a threshold to treat events as delay, output warning logs if delayed events were put into OSS.
|
254
|
+
|
255
|
+
## Configuration: Input Plugin
|
256
|
+
|
257
|
+
|Configuration|Type|Required|Comments|Default|
|
258
|
+
|:---:|:---:|:---:|:---:|:---|
|
259
|
+
|endpoint|string|Yes|OSS endpoint to connect|
|
260
|
+
|bucket|string|Yes|Your OSS bucket name|
|
261
|
+
|access_key_id|string|Yes|Your access key id|
|
262
|
+
|access_key_secret|string|Yes|Your access secret key|
|
263
|
+
|oss_sdk_log_dir|string|No|OSS SDK log directory|/var/log/td-agent|
|
264
|
+
|upload_crc_enable|bool|No|Enable upload crc check|true|
|
265
|
+
|download_crc_enable|bool|No|Enable download crc check|true|
|
266
|
+
|open_timeout|integer|No|Timeout seconds for open connections|10|
|
267
|
+
|read_timeout|integer|No|Timeout seconds for read response|120|
|
268
|
+
|store_as|string|No|Archive format on OSS|gzip|
|
269
|
+
|flush_batch_lines|integer|No|Flush to down streams every `flush_batch_lines` lines.|10000|
|
270
|
+
|flush_pause_milliseconds|integer|No|Sleep interval between two flushes to downstream.|1|
|
271
|
+
|store_local|bool|No|Store OSS Objects to local or memory before parsing(Used for objects with `text`/`json`/`gzip` formats)|true|
|
272
|
+
|mns|configuration section|Yes|MNS configurations|
|
273
|
+
|
274
|
+
### Usage
|
275
|
+
This is an example of fluent config.
|
276
|
+
|
277
|
+
<source>
|
278
|
+
@type oss
|
279
|
+
endpoint <OSS endpoint to connect to>
|
280
|
+
bucket <Your Bucket>
|
281
|
+
access_key_id <Your Access Key>
|
282
|
+
access_key_secret <Your Secret Key>
|
283
|
+
flush_batch_lines 1000
|
284
|
+
<mns>
|
285
|
+
endpoint <MNS endpoint to connect to, E.g.,{account-id}.mns.cn-zhangjiakou-internal.aliyuncs.com>
|
286
|
+
queue <MNS queue>
|
287
|
+
wait_seconds 10
|
288
|
+
poll_interval_seconds 10
|
289
|
+
</mns>
|
290
|
+
</source>
|
291
|
+
|
292
|
+
### Some configuration details
|
293
|
+
|
294
|
+
**store_as**
|
295
|
+
archive format on OSS. You can use several format:
|
296
|
+
* gzip (default)
|
297
|
+
* json
|
298
|
+
* text
|
299
|
+
* lzo (Need lzop command)
|
300
|
+
* lzma2 (Need xz command)
|
301
|
+
* gzip_command (Need gzip command)
|
302
|
+
* This compressor uses an external gzip command, hence would result in
|
303
|
+
utilizing CPU cores well compared with `gzip`
|
304
|
+
|
305
|
+
**flush_batch_lines**
|
306
|
+
|
307
|
+
Flush to down streams every `flush_batch_lines` lines.
|
308
|
+
|
309
|
+
**flush_pause_milliseconds**
|
310
|
+
|
311
|
+
Sleep interval between two flushes to downstream. Default is 1ms, and wil not sleep if `flush_pause_milliseconds` is less than or equal to 0.
|
312
|
+
|
313
|
+
**store_local(default is true)**
|
314
|
+
|
315
|
+
Store OSS Objects to local or memory before parsing(Used for objects with `text`/`json`/`gzip` formats).
|
316
|
+
|
317
|
+
Objects with `lzo`/`lzma2`/`gzip_command` formats are always stored to local directory before parsing.
|
318
|
+
|
319
|
+
**format**
|
320
|
+
|
321
|
+
Parse a line as this format in the OSS object. Supported formats are "apache_error", "apache2", "syslog", "json", "tsv", "ltsv", "csv", "nginx" and "none".
|
322
|
+
|
323
|
+
**mns**
|
324
|
+
|
325
|
+
[MNS consume messages](https://help.aliyun.com/document_detail/35136.html)
|
326
|
+
|
327
|
+
* endpoint
|
328
|
+
* queue
|
329
|
+
* wait_seconds
|
330
|
+
* poll_interval_seconds Poll messages interval from MNS
|
331
|
+
|
332
|
+
For more details about mns configurations, please view MNS documentation in the link above.
|
333
|
+
|
334
|
+
## Website, license, et. al.
|
335
|
+
|
336
|
+
| Web site | http://fluentd.org/ |
|
337
|
+
|-------------------|-------------------------------------------|
|
338
|
+
| Documents | http://docs.fluentd.org/ |
|
339
|
+
| Source repository | http://github.com/aliyun/fluent-plugin-oss |
|
340
|
+
| Discussion | http://groups.google.com/group/fluentd |
|
341
|
+
| Author | Jinhu Wu |
|
342
|
+
| License | Apache License, Version 2.0 |
|
data/Rakefile
ADDED
File without changes
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,24 @@
|
|
1
|
+
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'fluent-plugin-aliyun-oss'
|
5
|
+
gem.description = 'Aliyun OSS plugin for Fluentd event collector'
|
6
|
+
gem.license = 'Apache-2.0'
|
7
|
+
gem.homepage = 'https://github.com/aliyun/fluent-plugin-oss'
|
8
|
+
gem.summary = gem.description
|
9
|
+
gem.version = File.read('VERSION').strip
|
10
|
+
gem.authors = ['Jinhu Wu']
|
11
|
+
gem.email = 'jinhu.wu.nju@gmail.com'
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map do |f|
|
15
|
+
File.basename(f)
|
16
|
+
end
|
17
|
+
gem.require_paths = ['lib']
|
18
|
+
|
19
|
+
gem.add_dependency 'aliyun-sdk', ['0.7.0']
|
20
|
+
gem.add_dependency 'fluentd', ['>= 0.14.22', '< 2']
|
21
|
+
gem.add_development_dependency 'rake', '~> 0.9', '>= 0.9.2'
|
22
|
+
gem.add_development_dependency 'test-unit', '~> 3.0', '>= 3.0.8'
|
23
|
+
gem.add_development_dependency 'test-unit-rr', '~> 1.0', '>= 1.0.3'
|
24
|
+
end
|
@@ -0,0 +1,348 @@
|
|
1
|
+
require 'fluent/plugin/input'
|
2
|
+
require 'aliyun/oss'
|
3
|
+
require 'aliyun/sts'
|
4
|
+
require 'base64'
|
5
|
+
require 'fluent/plugin/mns/request'
|
6
|
+
require 'fluent/plugin/mns/message'
|
7
|
+
require 'json'
|
8
|
+
require 'zlib'
|
9
|
+
|
10
|
+
# This is Fluent OSS Input Plugin
|
11
|
+
module Fluent
|
12
|
+
# Fluent OSS Plugin
|
13
|
+
module Plugin
|
14
|
+
# OSSInput class implementation
|
15
|
+
class OSSInput < Input
|
16
|
+
Fluent::Plugin.register_input('oss', self)
|
17
|
+
|
18
|
+
helpers :compat_parameters, :parser, :thread
|
19
|
+
|
20
|
+
DEFAULT_PARSE_TYPE = 'none'.freeze
|
21
|
+
|
22
|
+
desc 'OSS endpoint to connect to'
|
23
|
+
config_param :endpoint, :string
|
24
|
+
desc 'Your bucket name'
|
25
|
+
config_param :bucket, :string
|
26
|
+
desc 'Your access key id'
|
27
|
+
config_param :access_key_id, :string
|
28
|
+
desc 'Your access secret key'
|
29
|
+
config_param :access_key_secret, :string
|
30
|
+
config_param :upload_crc_enable, :bool, default: true
|
31
|
+
config_param :download_crc_enable, :bool, default: true
|
32
|
+
desc 'Timeout for open connections'
|
33
|
+
config_param :open_timeout, :integer, default: 10
|
34
|
+
desc 'Timeout for read response'
|
35
|
+
config_param :read_timeout, :integer, default: 120
|
36
|
+
|
37
|
+
desc 'OSS SDK log directory'
|
38
|
+
config_param :oss_sdk_log_dir, :string, default: '/var/log/td-agent'
|
39
|
+
|
40
|
+
desc 'Archive format on OSS'
|
41
|
+
config_param :store_as, :string, default: 'gzip'
|
42
|
+
|
43
|
+
desc 'Flush to down streams every `flush_batch_lines` lines'
|
44
|
+
config_param :flush_batch_lines, :integer, default: 1000
|
45
|
+
|
46
|
+
desc 'Sleep interval between two flushes to downstream'
|
47
|
+
config_param :flush_pause_milliseconds, :integer, default: 1
|
48
|
+
|
49
|
+
desc 'Store OSS Objects to local or memory before parsing'
|
50
|
+
config_param :store_local, :bool, default: true
|
51
|
+
|
52
|
+
config_section :mns, required: true, multi: false do
|
53
|
+
desc 'MNS endpoint to connect to'
|
54
|
+
config_param :endpoint, :string
|
55
|
+
desc 'MNS queue to poll messages'
|
56
|
+
config_param :queue, :string
|
57
|
+
desc 'MNS max waiting time to receive messages'
|
58
|
+
config_param :wait_seconds, :integer, default: nil
|
59
|
+
desc 'Poll messages interval from MNS'
|
60
|
+
config_param :poll_interval_seconds, :integer, default: 30
|
61
|
+
end
|
62
|
+
|
63
|
+
def initialize
|
64
|
+
super
|
65
|
+
@decompressor = nil
|
66
|
+
end
|
67
|
+
|
68
|
+
desc 'Tag string'
|
69
|
+
config_param :tag, :string, default: 'input.oss'
|
70
|
+
|
71
|
+
config_section :parse do
|
72
|
+
config_set_default :@type, DEFAULT_PARSE_TYPE
|
73
|
+
end
|
74
|
+
|
75
|
+
def configure(conf)
|
76
|
+
super
|
77
|
+
|
78
|
+
raise Fluent::ConfigError, 'Invalid oss endpoint' if @endpoint.nil?
|
79
|
+
|
80
|
+
raise Fluent::ConfigError, 'Invalid mns endpoint' if @mns.endpoint.nil?
|
81
|
+
|
82
|
+
raise Fluent::ConfigError, 'Invalid mns queue' if @mns.queue.nil?
|
83
|
+
|
84
|
+
@decompressor = DECOMPRESSOR_REGISTRY.lookup(@store_as).new(log: log)
|
85
|
+
@decompressor.configure(conf)
|
86
|
+
|
87
|
+
parser_config = conf.elements('parse').first
|
88
|
+
@parser = parser_create(conf: parser_config, default_type: DEFAULT_PARSE_TYPE)
|
89
|
+
|
90
|
+
@flush_pause_milliseconds *= 0.001
|
91
|
+
end
|
92
|
+
|
93
|
+
def multi_workers_ready?
|
94
|
+
true
|
95
|
+
end
|
96
|
+
|
97
|
+
def start
|
98
|
+
@oss_sdk_log_dir += '/' unless @oss_sdk_log_dir.end_with?('/')
|
99
|
+
Aliyun::Common::Logging.set_log_file(@oss_sdk_log_dir + Aliyun::Common::Logging::DEFAULT_LOG_FILE)
|
100
|
+
create_oss_client unless @oss
|
101
|
+
|
102
|
+
check_bucket
|
103
|
+
super
|
104
|
+
|
105
|
+
@running = true
|
106
|
+
thread_create(:in_oss, &method(:run))
|
107
|
+
end
|
108
|
+
|
109
|
+
def check_bucket
|
110
|
+
unless @oss.bucket_exist?(@bucket)
|
111
|
+
raise "The specified bucket does not exist: bucket = #{@bucket}"
|
112
|
+
end
|
113
|
+
|
114
|
+
@bucket_handler = @oss.get_bucket(@bucket)
|
115
|
+
end
|
116
|
+
|
117
|
+
def create_oss_client
|
118
|
+
@oss = Aliyun::OSS::Client.new(
|
119
|
+
endpoint: @endpoint,
|
120
|
+
access_key_id: @access_key_id,
|
121
|
+
access_key_secret: @access_key_secret,
|
122
|
+
download_crc_enable: @download_crc_enable,
|
123
|
+
upload_crc_enable: @upload_crc_enable,
|
124
|
+
open_timeout: @open_timeout,
|
125
|
+
read_timeout: @read_timeout
|
126
|
+
)
|
127
|
+
end
|
128
|
+
|
129
|
+
def shutdown
|
130
|
+
@running = false
|
131
|
+
super
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def run
|
137
|
+
while @running
|
138
|
+
log.info "start to poll message from MNS queue #{@mns.queue}"
|
139
|
+
message = receive_message(@mns.queue, @mns.wait_seconds)
|
140
|
+
process(Fluent::Plugin::MNS::Message.new(@mns.queue, message)) unless message.nil?
|
141
|
+
sleep(@mns.poll_interval_seconds)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def receive_message(queue, wait_seconds)
|
146
|
+
request_opts = {}
|
147
|
+
request_opts = { waitseconds: wait_seconds } if wait_seconds
|
148
|
+
opts = {
|
149
|
+
log: log,
|
150
|
+
method: 'GET',
|
151
|
+
endpoint: @mns.endpoint,
|
152
|
+
path: "/queues/#{queue}/messages",
|
153
|
+
access_key_id: @access_key_id,
|
154
|
+
access_key_secret: @access_key_secret
|
155
|
+
}
|
156
|
+
Fluent::Plugin::MNS::Request.new(opts, {}, request_opts).execute
|
157
|
+
end
|
158
|
+
|
159
|
+
def process(message)
|
160
|
+
objects = get_objects(message)
|
161
|
+
objects.each do |object|
|
162
|
+
key = object.key
|
163
|
+
log.info "read object #{key}, size #{object.size} from OSS"
|
164
|
+
|
165
|
+
if @bucket_handler.object_exists?(key)
|
166
|
+
if @decompressor.save_to_local?
|
167
|
+
io = Tempfile.new('chunk-' + @store_as + '-in-')
|
168
|
+
io.binmode
|
169
|
+
@bucket_handler.get_object(key) do |chunk|
|
170
|
+
io.write(chunk)
|
171
|
+
end
|
172
|
+
else
|
173
|
+
io = StringIO.new
|
174
|
+
@bucket_handler.get_object(key) do |chunk|
|
175
|
+
io << chunk
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
io.rewind
|
180
|
+
|
181
|
+
begin
|
182
|
+
content = @decompressor.decompress(io)
|
183
|
+
rescue StandardError => ex
|
184
|
+
log.warn "#{ex}, skip object #{key}"
|
185
|
+
next
|
186
|
+
end
|
187
|
+
|
188
|
+
es = Fluent::MultiEventStream.new
|
189
|
+
content.each_line do |line|
|
190
|
+
@parser.parse(line) do |time, record|
|
191
|
+
es.add(time, record)
|
192
|
+
end
|
193
|
+
|
194
|
+
if es.size >= @flush_batch_lines
|
195
|
+
router.emit_stream(@tag, es)
|
196
|
+
es = Fluent::MultiEventStream.new
|
197
|
+
if @flush_pause_milliseconds > 0
|
198
|
+
sleep(@flush_pause_milliseconds)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
router.emit_stream(@tag, es)
|
203
|
+
io.close(true) rescue nil if @decompressor.save_to_local?
|
204
|
+
else
|
205
|
+
log.warn "in_oss: object #{key} does not exist!"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
delete_message(@mns.queue, message)
|
209
|
+
end
|
210
|
+
|
211
|
+
def get_objects(message)
|
212
|
+
objects = []
|
213
|
+
events = JSON.parse(Base64.decode64(message.body))['events']
|
214
|
+
events.each do |event|
|
215
|
+
objects.push(OSSObject.new(event['eventName'],
|
216
|
+
@bucket,
|
217
|
+
event['oss']['object']['key'],
|
218
|
+
event['oss']['object']['size'],
|
219
|
+
event['oss']['object']['eTag']))
|
220
|
+
end
|
221
|
+
objects
|
222
|
+
end
|
223
|
+
|
224
|
+
def delete_message(queue, message)
|
225
|
+
request_opts = { ReceiptHandle: message.receipt_handle }
|
226
|
+
opts = {
|
227
|
+
log: log,
|
228
|
+
method: 'DELETE',
|
229
|
+
endpoint: @mns.endpoint,
|
230
|
+
path: "/queues/#{queue}/messages",
|
231
|
+
access_key_id: @access_key_id,
|
232
|
+
access_key_secret: @access_key_secret
|
233
|
+
}
|
234
|
+
Fluent::Plugin::MNS::Request.new(opts, {}, request_opts).execute
|
235
|
+
end
|
236
|
+
|
237
|
+
# OSS Object class from MNS events
|
238
|
+
class OSSObject
|
239
|
+
attr_reader :event_name, :bucket, :key, :size, :etag
|
240
|
+
def initialize(event_name, bucket, key, size, etag)
|
241
|
+
@event_name = event_name
|
242
|
+
@bucket = bucket
|
243
|
+
@key = key
|
244
|
+
@size = size
|
245
|
+
@etag = etag
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
# Decompression base class.
|
250
|
+
class Decompressor
|
251
|
+
include Fluent::Configurable
|
252
|
+
|
253
|
+
attr_reader :log
|
254
|
+
|
255
|
+
def initialize(opts = {})
|
256
|
+
super()
|
257
|
+
@log = opts[:log]
|
258
|
+
end
|
259
|
+
|
260
|
+
def ext; end
|
261
|
+
|
262
|
+
def save_to_local?
|
263
|
+
true
|
264
|
+
end
|
265
|
+
|
266
|
+
def content_type; end
|
267
|
+
|
268
|
+
def decompress(io); end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
def check_command(command, encode = nil)
|
273
|
+
require 'open3'
|
274
|
+
|
275
|
+
encode = command if encode.nil?
|
276
|
+
begin
|
277
|
+
Open3.capture3("#{command} -V")
|
278
|
+
rescue Errno::ENOENT
|
279
|
+
raise Fluent::ConfigError,
|
280
|
+
"'#{command}' utility must be in PATH for #{encode} decompression"
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
# Gzip decompression.
|
286
|
+
class GZipDecompressor < Decompressor
|
287
|
+
def ext
|
288
|
+
'gz'.freeze
|
289
|
+
end
|
290
|
+
|
291
|
+
def content_type
|
292
|
+
'application/x-gzip'.freeze
|
293
|
+
end
|
294
|
+
|
295
|
+
def decompress(io)
|
296
|
+
Zlib::GzipReader.wrap(io)
|
297
|
+
end
|
298
|
+
|
299
|
+
def save_to_local?
|
300
|
+
config['store_local']
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
# Text decompression.
|
305
|
+
class TextDecompressor < Decompressor
|
306
|
+
def ext
|
307
|
+
'txt'.freeze
|
308
|
+
end
|
309
|
+
|
310
|
+
def content_type
|
311
|
+
'text/plain'.freeze
|
312
|
+
end
|
313
|
+
|
314
|
+
def decompress(io)
|
315
|
+
io
|
316
|
+
end
|
317
|
+
|
318
|
+
def save_to_local?
|
319
|
+
config['store_local']
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
# Json decompression.
|
324
|
+
class JsonDecompressor < TextDecompressor
|
325
|
+
def ext
|
326
|
+
'json'.freeze
|
327
|
+
end
|
328
|
+
|
329
|
+
def content_type
|
330
|
+
'application/json'.freeze
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
DECOMPRESSOR_REGISTRY = Fluent::Registry.new(:oss_decompressor_type, 'fluent/plugin/oss_decompressor_')
|
335
|
+
{
|
336
|
+
'gzip' => GZipDecompressor,
|
337
|
+
'text' => TextDecompressor,
|
338
|
+
'json' => JsonDecompressor
|
339
|
+
}.each do |name, decompressor|
|
340
|
+
DECOMPRESSOR_REGISTRY.register(name, decompressor)
|
341
|
+
end
|
342
|
+
|
343
|
+
def self.register_decompressor(name, decompressor)
|
344
|
+
DECOMPRESSOR_REGISTRY.register(name, decompressor)
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|