fluent-plugin-gcs 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +2 -0
- data/README.md +211 -148
- data/fluent-plugin-gcs.gemspec +12 -7
- data/lib/fluent/plugin/gcs/object_creator.rb +138 -35
- data/lib/fluent/plugin/gcs/version.rb +1 -1
- data/lib/fluent/plugin/out_gcs.rb +13 -15
- metadata +13 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 832c3cf74b29f669e37a3f6e5e4e720e1e588ec01608169a9dad2d0861b936b4
|
|
4
|
+
data.tar.gz: 2ef5f2d43dfdedb8516dfbbfd91799d1782a9c9922aa792d2ff45713a17bfb01
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: be563af89ac3b24afe7114ec12969d993efb4ac44b6911ad2056977679d223eb696afe6e7bce29b301821d3d3ab34421a8bcd08c7d1d433badd395ccaedb32f6
|
|
7
|
+
data.tar.gz: 8786fbff16b1a96fc733aaad166aa2d16ca33b529ae03fafadb54526824e76025aeff01c9a4ecd56bd5dbfa1164d1ae0bb327a7b85d6095a3c398a4573c949fc
|
data/.github/workflows/test.yml
CHANGED
data/README.md
CHANGED
|
@@ -3,30 +3,38 @@
|
|
|
3
3
|
[](https://github.com/daichirata/fluent-plugin-gcs/actions/workflows/test.yml)
|
|
4
4
|
[](https://badge.fury.io/rb/fluent-plugin-gcs)
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
A [Fluentd](https://www.fluentd.org/) output plugin that buffers events and uploads them to [Google Cloud Storage](https://cloud.google.com/storage).
|
|
7
|
+
|
|
8
|
+
## Features
|
|
9
|
+
|
|
10
|
+
- **Multiple formats** — store objects as gzip, plain text, or JSON.
|
|
11
|
+
- **Fast compression** — optionally shell out to the external `gzip` binary, with automatic fallback to the pure-Ruby compressor.
|
|
12
|
+
- **Flexible object keys** — build paths from time slices, tags, hostnames, random tokens, and UUIDs.
|
|
13
|
+
- **Server-side controls** — set ACLs, storage class, customer-supplied encryption keys, and custom object metadata.
|
|
14
|
+
- **Flexible auth** — explicit credentials or Application Default Credentials on GCE / GKE / Cloud Run.
|
|
7
15
|
|
|
8
16
|
## Table of contents
|
|
9
17
|
|
|
10
18
|
- [Requirements](#requirements)
|
|
11
19
|
- [Installation](#installation)
|
|
12
|
-
- [
|
|
20
|
+
- [Quick start](#quick-start)
|
|
13
21
|
- [Configuration](#configuration)
|
|
14
22
|
- [Authentication](#authentication)
|
|
15
|
-
- [
|
|
16
|
-
- [
|
|
17
|
-
- [
|
|
23
|
+
- [Object placement](#object-placement)
|
|
24
|
+
- [Format and compression](#format-and-compression)
|
|
25
|
+
- [GCS object settings](#gcs-object-settings)
|
|
26
|
+
- [Object key format](#object-key-format)
|
|
18
27
|
- [Object metadata](#object-metadata)
|
|
28
|
+
- [Examples](#examples)
|
|
19
29
|
- [Development](#development)
|
|
20
30
|
- [Author](#author)
|
|
21
31
|
- [License](#license)
|
|
22
32
|
|
|
23
33
|
## Requirements
|
|
24
34
|
|
|
25
|
-
| fluent-plugin-gcs | fluentd
|
|
26
|
-
|
|
27
|
-
| >= 0.
|
|
28
|
-
| >= 0.4.0 | >= 0.14.22 | >= 2.4 |
|
|
29
|
-
| < 0.4.0 | >= 0.12.0 | >= 1.9 |
|
|
35
|
+
| fluent-plugin-gcs | fluentd | ruby |
|
|
36
|
+
|-------------------|----------|--------|
|
|
37
|
+
| >= 0.5.0 | >= 1.0 | >= 3.3 |
|
|
30
38
|
|
|
31
39
|
## Installation
|
|
32
40
|
|
|
@@ -34,238 +42,293 @@ Google Cloud Storage output plugin for [Fluentd](https://github.com/fluent/fluen
|
|
|
34
42
|
gem install fluent-plugin-gcs
|
|
35
43
|
```
|
|
36
44
|
|
|
37
|
-
|
|
45
|
+
Using td-agent / fluent-package:
|
|
38
46
|
|
|
39
47
|
```shell
|
|
40
48
|
fluent-gem install fluent-plugin-gcs
|
|
41
49
|
```
|
|
42
50
|
|
|
43
|
-
##
|
|
51
|
+
## Quick start
|
|
52
|
+
|
|
53
|
+
The minimal configuration needs only a bucket. On GCE, GKE, or Cloud Run the credentials are picked up automatically from the environment.
|
|
44
54
|
|
|
45
55
|
```aconf
|
|
46
|
-
<match
|
|
56
|
+
<match your.tag>
|
|
47
57
|
@type gcs
|
|
48
58
|
|
|
49
|
-
project YOUR_PROJECT
|
|
50
|
-
keyfile YOUR_KEYFILE_PATH
|
|
51
59
|
bucket YOUR_GCS_BUCKET_NAME
|
|
52
|
-
|
|
53
|
-
path logs/${tag}/%Y/%m/%d/
|
|
60
|
+
path logs/
|
|
54
61
|
|
|
55
|
-
|
|
56
|
-
# specify the corresponding chunk keys in the <buffer> argument.
|
|
57
|
-
<buffer tag,time>
|
|
62
|
+
<buffer time>
|
|
58
63
|
@type file
|
|
59
64
|
path /var/log/fluent/gcs
|
|
60
|
-
timekey 1h
|
|
65
|
+
timekey 1h
|
|
61
66
|
timekey_wait 10m
|
|
62
67
|
timekey_use_utc true
|
|
63
68
|
</buffer>
|
|
64
|
-
|
|
65
|
-
<format>
|
|
66
|
-
@type json
|
|
67
|
-
</format>
|
|
68
69
|
</match>
|
|
69
70
|
```
|
|
70
71
|
|
|
72
|
+
This writes gzip-compressed objects such as `logs/2024010112_0.gz`, one per hourly time slice.
|
|
73
|
+
|
|
71
74
|
## Configuration
|
|
72
75
|
|
|
73
76
|
### Authentication
|
|
74
77
|
|
|
75
|
-
Provide
|
|
76
|
-
Application Default Credentials when running on Google Compute Engine,
|
|
77
|
-
GKE, or Cloud Run.
|
|
78
|
+
Provide credentials explicitly, or rely on [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials) when running on Google Cloud.
|
|
78
79
|
|
|
79
|
-
|
|
80
|
+
| Option | Type | Default | Description |
|
|
81
|
+
|--------------------|---------|---------|-------------|
|
|
82
|
+
| `project` | string | `nil` | GCS project identifier |
|
|
83
|
+
| `keyfile` | string | `nil` | Path to a service account credentials JSON file |
|
|
84
|
+
| `credentials_json` | hash | `nil` | Service account credentials inline as JSON. Takes precedence over `keyfile` |
|
|
85
|
+
| `client_retries` | integer | `nil` | Number of retries on server error |
|
|
86
|
+
| `client_timeout` | integer | `nil` | Request timeout in seconds |
|
|
80
87
|
|
|
81
|
-
|
|
88
|
+
`project` is resolved in the following order: the `project` option, then the `STORAGE_PROJECT` / `GOOGLE_CLOUD_PROJECT` / `GCLOUD_PROJECT` environment variables, then GCE metadata.
|
|
82
89
|
|
|
83
|
-
|
|
84
|
-
* Environment variables `STORAGE_PROJECT`, `GOOGLE_CLOUD_PROJECT`, `GCLOUD_PROJECT`
|
|
85
|
-
* GCE metadata
|
|
90
|
+
`keyfile` is resolved in the following order: the `keyfile` option, the `GOOGLE_CLOUD_KEYFILE` / `GCLOUD_KEYFILE` (path) or `GOOGLE_CLOUD_KEYFILE_JSON` / `GCLOUD_KEYFILE_JSON` (inline) environment variables, the Cloud SDK's well-known path, then GCE metadata.
|
|
86
91
|
|
|
87
|
-
|
|
92
|
+
### Object placement
|
|
88
93
|
|
|
89
|
-
|
|
94
|
+
| Option | Type | Default | Description |
|
|
95
|
+
|---------------------|---------|---------|-------------|
|
|
96
|
+
| `bucket` | string | — | **Required.** GCS bucket name |
|
|
97
|
+
| `path` | string | `""` | Path prefix for objects |
|
|
98
|
+
| `object_key_format` | string | `%{path}%{time_slice}_%{index}.%{file_extension}` | Template for object keys. See [Object key format](#object-key-format) |
|
|
99
|
+
| `hex_random_length` | integer | `4` | Length of the `%{hex_random}` placeholder (max 32) |
|
|
100
|
+
| `overwrite` | bool | `false` | Overwrite the existing object instead of incrementing `%{index}` |
|
|
101
|
+
| `blind_write` | bool | `false` | Skip the existence check before writing (see below) |
|
|
90
102
|
|
|
91
|
-
|
|
92
|
-
* Environment variables `GOOGLE_CLOUD_KEYFILE`, `GCLOUD_KEYFILE`
|
|
93
|
-
* Environment variables `GOOGLE_CLOUD_KEYFILE_JSON`, `GCLOUD_KEYFILE_JSON`
|
|
94
|
-
* The Cloud SDK's well-known credentials path
|
|
95
|
-
* GCE metadata
|
|
103
|
+
**Avoiding key collisions.** When `object_key_format` contains `%{index}` (the default), the plugin checks GCS for an existing object and increments `%{index}` until it finds an unused key, so existing objects are never overwritten. This existence check requires the `storage.objects.get` permission.
|
|
96
104
|
|
|
97
|
-
|
|
105
|
+
**`blind_write`** skips that existence check, so the `storage.objects.get` permission is no longer needed. The trade-off is that `%{index}` stops working (it always stays `0`), so you must keep keys unique another way, with `%{hex_random}` (unique per chunk) or `%{uuid_flush}` (unique per flush).
|
|
98
106
|
|
|
99
|
-
|
|
107
|
+
> [!WARNING]
|
|
108
|
+
> If a key collides with an existing object (which can happen with `blind_write true`, or with `overwrite true`), uploading it overwrites the existing object, and GCS requires the `storage.objects.delete` permission to do so. Without that permission the flush fails repeatedly and the buffer chunk is eventually lost. With `blind_write true`, include `%{hex_random}` or `%{uuid_flush}` in `object_key_format` to avoid collisions.
|
|
100
109
|
|
|
101
|
-
|
|
102
|
-
credentials_json {"type": "service_account", "project_id": "...", ...}
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
#### client_retries
|
|
110
|
+
### Format and compression
|
|
106
111
|
|
|
107
|
-
|
|
112
|
+
| Option | Type | Default | Description |
|
|
113
|
+
|---------------------|--------|--------------|-------------|
|
|
114
|
+
| `store_as` | enum | `gzip` | Object format. See the table below |
|
|
115
|
+
| `command_parameter` | string | (per format) | Override the default arguments for the compression command (`gzip_command` / `lzo` / `lzma2` / `zstd`) |
|
|
116
|
+
| `transcoding` | bool | `false` | Enable [decompressive transcoding](https://cloud.google.com/storage/docs/transcoding) (gzip only) |
|
|
108
117
|
|
|
109
|
-
|
|
118
|
+
| `store_as` | Compression | Requires | Default args | Extension | content_type |
|
|
119
|
+
|----------------|-------------|----------|--------------|-----------|--------------|
|
|
120
|
+
| `gzip` | Ruby's built-in `Zlib::GzipWriter` | (none) | — | `gz` | `application/gzip` |
|
|
121
|
+
| `gzip_command` | External `gzip`. Faster for large chunks, falls back to `Zlib::GzipWriter` on failure | `gzip` command | (none) | `gz` | `application/gzip` |
|
|
122
|
+
| `lzo` | External `lzop` | `lzop` command | `-qf1` | `lzo` | `application/x-lzop` |
|
|
123
|
+
| `lzma2` | External `xz` | `xz` command | `-qf0` | `xz` | `application/x-xz` |
|
|
124
|
+
| `zstd` | External `zstd` | `zstd` command | (none) | `zst` | `application/x-zst` |
|
|
125
|
+
| `json` | None (upload as JSON) | (none) | — | `json` | `application/json` |
|
|
126
|
+
| `text` | None (upload as text) | (none) | — | `txt` | `text/plain` |
|
|
110
127
|
|
|
111
|
-
|
|
128
|
+
The command-based formats (`gzip_command`, `lzo`, `lzma2`, `zstd`) stream the chunk through the command's stdin (no intermediate temp file). Each has a sensible default argument set; override it with `command_parameter`. Multiple arguments are separated by spaces; the value is parsed with `shellsplit`, so it is **not** evaluated by a shell:
|
|
112
129
|
|
|
113
|
-
|
|
130
|
+
```aconf
|
|
131
|
+
store_as gzip_command
|
|
132
|
+
command_parameter -1 # single argument
|
|
133
|
+
```
|
|
114
134
|
|
|
115
|
-
|
|
135
|
+
```aconf
|
|
136
|
+
store_as zstd
|
|
137
|
+
command_parameter -19 --long # multiple arguments, split on spaces
|
|
138
|
+
```
|
|
116
139
|
|
|
117
|
-
|
|
140
|
+
Quote a value that itself contains a space, the same way you would in a shell (`command_parameter -o "with space"`).
|
|
118
141
|
|
|
119
|
-
|
|
142
|
+
`gzip_command` falls back to `Zlib::GzipWriter` if the `gzip` command fails. `lzo` / `lzma2` / `zstd` have no fallback, so the command must be installed (checked at startup), and they are not compatible with `transcoding`, which is gzip-specific.
|
|
120
143
|
|
|
121
|
-
|
|
144
|
+
> [!NOTE]
|
|
145
|
+
> `gzip_command_parameter` is a deprecated alias of `command_parameter`, kept for backward compatibility with v0.4.x configs. New configs should use `command_parameter`.
|
|
122
146
|
|
|
123
|
-
|
|
147
|
+
The per-line format is configured with a `<format>` section (default `out_file`):
|
|
124
148
|
|
|
125
|
-
|
|
149
|
+
```aconf
|
|
150
|
+
<format>
|
|
151
|
+
@type json
|
|
152
|
+
</format>
|
|
153
|
+
```
|
|
126
154
|
|
|
127
|
-
|
|
155
|
+
See the [Formatter documentation](https://docs.fluentd.org/formatter) for available types (`out_file`, `json`, `ltsv`, `single_value`, ...).
|
|
128
156
|
|
|
129
|
-
|
|
130
|
-
|--------------------|-------------|
|
|
131
|
-
| `%{path}` | The value of `path` |
|
|
132
|
-
| `%{time_slice}` | The time slice text formatted based on the `<buffer>` `timekey` |
|
|
133
|
-
| `%{index}` | Sequential number starting from 0, increments when multiple files are uploaded in the same time slice |
|
|
134
|
-
| `%{file_extension}` | Inferred from `store_as` (`gz` for gzip / gzip_command, `json` for json, `txt` for text) |
|
|
135
|
-
| `%{uuid_flush}` | A UUID generated each time the buffer is flushed |
|
|
136
|
-
| `%{hex_random}` | A random hex string generated for each buffer chunk. Configurable via `hex_random_length` (default: 4) |
|
|
137
|
-
| `%{hostname}` | The hostname of the running server |
|
|
157
|
+
### GCS object settings
|
|
138
158
|
|
|
139
|
-
|
|
159
|
+
| Option | Type | Default | Description |
|
|
160
|
+
|----------------------|--------|---------|-------------|
|
|
161
|
+
| `auto_create_bucket` | bool | `true` | Create the bucket if it does not exist |
|
|
162
|
+
| `acl` | enum | `nil` | Predefined ACL for uploaded objects (see below) |
|
|
163
|
+
| `storage_class` | enum | `nil` | Storage class for uploaded objects (see below) |
|
|
164
|
+
| `encryption_key` | string | `nil` | Customer-supplied AES-256 key for server-side encryption |
|
|
140
165
|
|
|
141
|
-
|
|
166
|
+
**`acl`** accepts one of `auth_read`, `owner_full`, `owner_read`, `private`, `project_private`, `public_read`. Defaults to the bucket's default object ACL. See the [access control documentation](https://cloud.google.com/storage/docs/access-control/lists).
|
|
142
167
|
|
|
143
|
-
|
|
168
|
+
**`storage_class`** accepts one of `dra`, `nearline`, `coldline`, `multi_regional`, `regional`, `standard`. See the [storage classes documentation](https://cloud.google.com/storage/docs/storage-classes).
|
|
144
169
|
|
|
145
|
-
|
|
170
|
+
**`encryption_key`** enables [customer-supplied encryption](https://cloud.google.com/storage/docs/encryption#customer-supplied); the `encryption_key_sha256` is computed automatically.
|
|
146
171
|
|
|
147
|
-
|
|
172
|
+
### Object key format
|
|
148
173
|
|
|
149
|
-
|
|
174
|
+
`object_key_format` supports the following placeholders:
|
|
150
175
|
|
|
151
|
-
|
|
176
|
+
| Placeholder | Description |
|
|
177
|
+
|---------------------|-------------|
|
|
178
|
+
| `%{path}` | The value of the `path` option |
|
|
179
|
+
| `%{time_slice}` | Time slice text derived from the `<buffer>` `timekey` |
|
|
180
|
+
| `%{index}` | Sequential number (from 0) within the same time slice |
|
|
181
|
+
| `%{file_extension}` | Inferred from `store_as` (`gz` / `lzo` / `xz` / `zst` / `json` / `txt`) |
|
|
182
|
+
| `%{uuid_flush}` | A UUID generated on every buffer flush |
|
|
183
|
+
| `%{hex_random}` | A random hex string per chunk, length set by `hex_random_length` |
|
|
184
|
+
| `%{hostname}` | The hostname of the running server |
|
|
152
185
|
|
|
153
|
-
|
|
154
|
-
> If the object already exists and `storage.objects.delete` is not granted either, you get an unrecoverable error. Use `%{hex_random}` or `%{uuid_flush}` to keep object keys unique.
|
|
186
|
+
The default is `%{path}%{time_slice}_%{index}.%{file_extension}`.
|
|
155
187
|
|
|
156
|
-
###
|
|
188
|
+
### Object metadata
|
|
157
189
|
|
|
158
|
-
|
|
190
|
+
Attach arbitrary `x-goog-meta-*` headers to uploaded objects with one or more `<object_metadata>` sections:
|
|
159
191
|
|
|
160
|
-
|
|
192
|
+
```aconf
|
|
193
|
+
<object_metadata>
|
|
194
|
+
key KEY_1
|
|
195
|
+
value VALUE_1
|
|
196
|
+
</object_metadata>
|
|
197
|
+
|
|
198
|
+
<object_metadata>
|
|
199
|
+
key KEY_2
|
|
200
|
+
value VALUE_2
|
|
201
|
+
</object_metadata>
|
|
202
|
+
```
|
|
161
203
|
|
|
162
|
-
|
|
163
|
-
|-----------------|-------------|
|
|
164
|
-
| `gzip` | Compress with the Ruby built-in `Zlib::GzipWriter` |
|
|
165
|
-
| `gzip_command` | Compress with an external `gzip` command. Faster for large chunks. Falls back to `Zlib::GzipWriter` if the command fails |
|
|
166
|
-
| `json` | Upload as `application/json` |
|
|
167
|
-
| `text` | Upload as `text/plain` |
|
|
204
|
+
## Examples
|
|
168
205
|
|
|
169
|
-
|
|
206
|
+
### Partition by tag and date
|
|
170
207
|
|
|
171
|
-
|
|
208
|
+
```aconf
|
|
209
|
+
<match app.**>
|
|
210
|
+
@type gcs
|
|
172
211
|
|
|
173
|
-
|
|
212
|
+
project YOUR_PROJECT
|
|
213
|
+
bucket YOUR_GCS_BUCKET_NAME
|
|
214
|
+
object_key_format %{path}%{time_slice}/%{hostname}_%{index}.%{file_extension}
|
|
215
|
+
path logs/${tag}/
|
|
174
216
|
|
|
175
|
-
|
|
176
|
-
|
|
217
|
+
<buffer tag,time>
|
|
218
|
+
@type file
|
|
219
|
+
path /var/log/fluent/gcs
|
|
220
|
+
timekey 1d
|
|
221
|
+
timekey_wait 10m
|
|
222
|
+
timekey_use_utc true
|
|
223
|
+
</buffer>
|
|
177
224
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
store_as gzip_command
|
|
182
|
-
gzip_command_parameter -1
|
|
183
|
-
...
|
|
225
|
+
<format>
|
|
226
|
+
@type json
|
|
227
|
+
</format>
|
|
184
228
|
</match>
|
|
185
229
|
```
|
|
186
230
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
Enable the decompressive form of transcoding. See [Transcoding of gzip-compressed files](https://cloud.google.com/storage/docs/transcoding).
|
|
190
|
-
|
|
191
|
-
#### format
|
|
231
|
+
For the tag `app.web` on host `web1`, this writes objects such as `logs/app.web/20240101/web1_0.gz`.
|
|
192
232
|
|
|
193
|
-
|
|
233
|
+
### Fine-grained 1-minute partitions
|
|
194
234
|
|
|
195
|
-
|
|
196
|
-
* `json`
|
|
197
|
-
* `ltsv`
|
|
198
|
-
* `single_value`
|
|
235
|
+
When `timekey` is under an hour, `%{time_slice}` automatically resolves to minute granularity (`%Y%m%d%H%M`).
|
|
199
236
|
|
|
200
|
-
|
|
237
|
+
```aconf
|
|
238
|
+
<match app.**>
|
|
239
|
+
@type gcs
|
|
201
240
|
|
|
202
|
-
|
|
241
|
+
bucket YOUR_GCS_BUCKET_NAME
|
|
242
|
+
path logs/
|
|
203
243
|
|
|
204
|
-
|
|
244
|
+
<buffer time>
|
|
245
|
+
@type file
|
|
246
|
+
path /var/log/fluent/gcs
|
|
247
|
+
timekey 1m # 1 minute partition
|
|
248
|
+
timekey_wait 10s # short wait for late events
|
|
249
|
+
timekey_use_utc true
|
|
250
|
+
</buffer>
|
|
251
|
+
</match>
|
|
252
|
+
```
|
|
205
253
|
|
|
206
|
-
|
|
254
|
+
This writes objects such as `logs/202401011230_0.gz`, one (or more) per minute.
|
|
207
255
|
|
|
208
|
-
|
|
256
|
+
### Fast compression with the external gzip
|
|
209
257
|
|
|
210
|
-
|
|
258
|
+
```aconf
|
|
259
|
+
<match app.**>
|
|
260
|
+
@type gcs
|
|
211
261
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
| `owner_read` | File owner gets OWNER access, and project team owners get READER access |
|
|
217
|
-
| `private` | File owner gets OWNER access |
|
|
218
|
-
| `project_private` | File owner gets OWNER access, and project team members get access according to their roles |
|
|
219
|
-
| `public_read` | File owner gets OWNER access, and allUsers get READER access |
|
|
262
|
+
bucket YOUR_GCS_BUCKET_NAME
|
|
263
|
+
path logs/
|
|
264
|
+
store_as gzip_command
|
|
265
|
+
command_parameter -1
|
|
220
266
|
|
|
221
|
-
|
|
267
|
+
<buffer time>
|
|
268
|
+
@type file
|
|
269
|
+
path /var/log/fluent/gcs
|
|
270
|
+
timekey 1h
|
|
271
|
+
timekey_wait 10m
|
|
272
|
+
</buffer>
|
|
273
|
+
</match>
|
|
274
|
+
```
|
|
222
275
|
|
|
223
|
-
|
|
276
|
+
Using the default `object_key_format`, this writes objects such as `logs/2024010112_0.gz`, one per hourly slice.
|
|
224
277
|
|
|
225
|
-
|
|
278
|
+
### Cost-optimized cold storage
|
|
226
279
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
| `nearline` | Nearline Storage |
|
|
231
|
-
| `coldline` | Coldline Storage |
|
|
232
|
-
| `multi_regional` | Multi-Regional Storage |
|
|
233
|
-
| `regional` | Regional Storage |
|
|
234
|
-
| `standard` | Standard Storage |
|
|
280
|
+
```aconf
|
|
281
|
+
<match archive.**>
|
|
282
|
+
@type gcs
|
|
235
283
|
|
|
236
|
-
|
|
284
|
+
bucket YOUR_GCS_BUCKET_NAME
|
|
285
|
+
path archive/
|
|
286
|
+
storage_class coldline
|
|
287
|
+
acl project_private
|
|
237
288
|
|
|
238
|
-
|
|
289
|
+
<buffer time>
|
|
290
|
+
@type file
|
|
291
|
+
path /var/log/fluent/gcs-archive
|
|
292
|
+
timekey 1d
|
|
293
|
+
timekey_wait 1h
|
|
294
|
+
</buffer>
|
|
295
|
+
</match>
|
|
296
|
+
```
|
|
239
297
|
|
|
240
|
-
|
|
298
|
+
Using the default `object_key_format`, this writes objects such as `archive/20240101_0.gz`, one per day, stored in the Coldline class.
|
|
241
299
|
|
|
242
|
-
###
|
|
300
|
+
### Write without the get permission (blind_write)
|
|
243
301
|
|
|
244
|
-
|
|
302
|
+
`blind_write true` skips the existence check, so the `storage.objects.get` permission is not required. Because `%{index}` does not work in this mode, include `%{hex_random}` or `%{uuid_flush}` to keep keys unique.
|
|
245
303
|
|
|
246
304
|
```aconf
|
|
247
|
-
<match
|
|
305
|
+
<match app.**>
|
|
248
306
|
@type gcs
|
|
249
307
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
308
|
+
bucket YOUR_GCS_BUCKET_NAME
|
|
309
|
+
path logs/
|
|
310
|
+
object_key_format %{path}%{time_slice}_%{hex_random}.%{file_extension}
|
|
311
|
+
blind_write true
|
|
254
312
|
|
|
255
|
-
<
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
313
|
+
<buffer time>
|
|
314
|
+
@type file
|
|
315
|
+
path /var/log/fluent/gcs
|
|
316
|
+
timekey 1h
|
|
317
|
+
timekey_wait 10m
|
|
318
|
+
timekey_use_utc true
|
|
319
|
+
</buffer>
|
|
259
320
|
</match>
|
|
260
321
|
```
|
|
261
322
|
|
|
323
|
+
This writes objects such as `logs/2024010112_a1b2.gz`, with a per-chunk random suffix instead of an incrementing index.
|
|
324
|
+
|
|
262
325
|
## Development
|
|
263
326
|
|
|
264
327
|
```shell
|
|
265
328
|
bundle install
|
|
266
|
-
bundle exec rake test
|
|
267
|
-
bundle exec bundler-audit check --update
|
|
268
|
-
gem build fluent-plugin-gcs.gemspec
|
|
329
|
+
bundle exec rake test # run the test suite
|
|
330
|
+
bundle exec bundler-audit check --update # audit dependencies
|
|
331
|
+
gem build fluent-plugin-gcs.gemspec # build the gem
|
|
269
332
|
```
|
|
270
333
|
|
|
271
334
|
## Author
|
data/fluent-plugin-gcs.gemspec
CHANGED
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
-
require 'fluent/plugin/gcs/version'
|
|
1
|
+
require_relative "lib/fluent/plugin/gcs/version"
|
|
5
2
|
|
|
6
3
|
Gem::Specification.new do |spec|
|
|
7
4
|
spec.name = "fluent-plugin-gcs"
|
|
@@ -9,17 +6,25 @@ Gem::Specification.new do |spec|
|
|
|
9
6
|
spec.authors = ["Daichi HIRATA"]
|
|
10
7
|
spec.email = ["hirata.daichi@gmail.com"]
|
|
11
8
|
spec.summary = "Google Cloud Storage output plugin for Fluentd"
|
|
12
|
-
spec.description = "Google Cloud Storage
|
|
9
|
+
spec.description = "Fluentd output plugin that buffers events and uploads them to Google Cloud Storage as gzip, json, or text objects."
|
|
13
10
|
spec.homepage = "https://github.com/daichirata/fluent-plugin-gcs"
|
|
14
11
|
spec.license = "Apache-2.0"
|
|
15
12
|
|
|
16
|
-
spec.
|
|
13
|
+
spec.required_ruby_version = ">= 3.3"
|
|
14
|
+
|
|
15
|
+
spec.metadata = {
|
|
16
|
+
"source_code_uri" => spec.homepage,
|
|
17
|
+
"bug_tracker_uri" => "#{spec.homepage}/issues",
|
|
18
|
+
"rubygems_mfa_required" => "true",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
|
17
22
|
f.match(%r{^(test|spec|features)/})
|
|
18
23
|
end
|
|
19
24
|
spec.bindir = "exe"
|
|
20
25
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
21
26
|
spec.require_paths = ["lib"]
|
|
22
27
|
|
|
23
|
-
spec.add_runtime_dependency "fluentd",
|
|
28
|
+
spec.add_runtime_dependency "fluentd", ">= 1.0", "< 3"
|
|
24
29
|
spec.add_runtime_dependency "google-cloud-storage", "~> 1.1"
|
|
25
30
|
end
|
|
@@ -15,6 +15,12 @@ module Fluent
|
|
|
15
15
|
command_parameter: command_parameter,
|
|
16
16
|
log: log
|
|
17
17
|
)
|
|
18
|
+
when :lzo
|
|
19
|
+
Fluent::GCS::LZOObjectCreator.new(command_parameter: command_parameter, log: log)
|
|
20
|
+
when :lzma2
|
|
21
|
+
Fluent::GCS::LZMA2ObjectCreator.new(command_parameter: command_parameter, log: log)
|
|
22
|
+
when :zstd
|
|
23
|
+
Fluent::GCS::ZstdObjectCreator.new(command_parameter: command_parameter, log: log)
|
|
18
24
|
when :json
|
|
19
25
|
Fluent::GCS::JSONObjectCreator.new
|
|
20
26
|
when :text
|
|
@@ -49,6 +55,30 @@ module Fluent
|
|
|
49
55
|
end
|
|
50
56
|
end
|
|
51
57
|
|
|
58
|
+
class TextObjectCreator < ObjectCreator
|
|
59
|
+
def content_type
|
|
60
|
+
"text/plain"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def file_extension
|
|
64
|
+
"txt"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def write(chunk, io)
|
|
68
|
+
chunk.write_to(io)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
class JSONObjectCreator < TextObjectCreator
|
|
73
|
+
def content_type
|
|
74
|
+
"application/json"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def file_extension
|
|
78
|
+
"json"
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
52
82
|
class GZipObjectCreator < ObjectCreator
|
|
53
83
|
def initialize(transcoding)
|
|
54
84
|
@transcoding = transcoding
|
|
@@ -73,12 +103,54 @@ module Fluent
|
|
|
73
103
|
end
|
|
74
104
|
end
|
|
75
105
|
|
|
76
|
-
class
|
|
77
|
-
def initialize(
|
|
78
|
-
@
|
|
79
|
-
@command_parameter = command_parameter || ""
|
|
106
|
+
class CommandObjectCreator < ObjectCreator
|
|
107
|
+
def initialize(command_parameter: nil, log: nil)
|
|
108
|
+
@command_parameter = command_parameter
|
|
80
109
|
@log = log
|
|
81
|
-
|
|
110
|
+
check_command
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def write(chunk, io)
|
|
114
|
+
parameter = @command_parameter.nil? || @command_parameter.empty? ? default_parameter : @command_parameter
|
|
115
|
+
cmd = [command, *parameter.shellsplit, "-c"]
|
|
116
|
+
status = Open3.pipeline_w(cmd, out: io.path) do |stdin, wait_thrs|
|
|
117
|
+
chunk.write_to(stdin)
|
|
118
|
+
stdin.close
|
|
119
|
+
wait_thrs.last.value
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
handle_failure(chunk, io, status) unless status.success?
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
private
|
|
126
|
+
|
|
127
|
+
def command
|
|
128
|
+
raise NotImplementedError
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def store_as
|
|
132
|
+
raise NotImplementedError
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def default_parameter
|
|
136
|
+
""
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def handle_failure(chunk, io, status)
|
|
140
|
+
raise "failed to execute #{command} command. status = #{status}"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def check_command
|
|
144
|
+
Open3.capture3(command, "--version")
|
|
145
|
+
rescue Errno::ENOENT
|
|
146
|
+
raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{store_as} compression"
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
class GZipCommandObjectCreator < CommandObjectCreator
|
|
151
|
+
def initialize(transcoding: nil, command_parameter: nil, log: nil)
|
|
152
|
+
@transcoding = transcoding
|
|
153
|
+
super(command_parameter: command_parameter, log: log)
|
|
82
154
|
end
|
|
83
155
|
|
|
84
156
|
def content_type
|
|
@@ -93,60 +165,91 @@ module Fluent
|
|
|
93
165
|
"gz"
|
|
94
166
|
end
|
|
95
167
|
|
|
96
|
-
|
|
97
|
-
cmd = ["gzip", *@command_parameter.shellsplit, "-c"]
|
|
98
|
-
status = Open3.pipeline_w(cmd, out: io.path) do |stdin, wait_thrs|
|
|
99
|
-
chunk.write_to(stdin)
|
|
100
|
-
stdin.close
|
|
101
|
-
wait_thrs.last.value
|
|
102
|
-
end
|
|
168
|
+
private
|
|
103
169
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
io.truncate(0)
|
|
107
|
-
io.rewind
|
|
108
|
-
fallback_to_gzip_writer(chunk, io)
|
|
109
|
-
end
|
|
170
|
+
def command
|
|
171
|
+
"gzip"
|
|
110
172
|
end
|
|
111
173
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def check_gzip_command
|
|
115
|
-
begin
|
|
116
|
-
Open3.capture3("gzip -V")
|
|
117
|
-
rescue Errno::ENOENT
|
|
118
|
-
raise Fluent::ConfigError, "'gzip' utility must be in PATH for gzip_command compression"
|
|
119
|
-
end
|
|
174
|
+
def store_as
|
|
175
|
+
"gzip_command"
|
|
120
176
|
end
|
|
121
177
|
|
|
122
|
-
def
|
|
178
|
+
def handle_failure(chunk, io, status)
|
|
179
|
+
@log&.warn("failed to execute gzip command. Fallback to GzipWriter. status = #{status}")
|
|
180
|
+
io.truncate(0)
|
|
181
|
+
io.rewind
|
|
123
182
|
writer = Zlib::GzipWriter.new(io)
|
|
124
183
|
chunk.write_to(writer)
|
|
125
184
|
writer.finish
|
|
126
185
|
end
|
|
127
186
|
end
|
|
128
187
|
|
|
129
|
-
class
|
|
188
|
+
class LZOObjectCreator < CommandObjectCreator
|
|
130
189
|
def content_type
|
|
131
|
-
"
|
|
190
|
+
"application/x-lzop"
|
|
132
191
|
end
|
|
133
192
|
|
|
134
193
|
def file_extension
|
|
135
|
-
"
|
|
194
|
+
"lzo"
|
|
136
195
|
end
|
|
137
196
|
|
|
138
|
-
|
|
139
|
-
|
|
197
|
+
private
|
|
198
|
+
|
|
199
|
+
def command
|
|
200
|
+
"lzop"
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def default_parameter
|
|
204
|
+
"-qf1"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def store_as
|
|
208
|
+
"lzo"
|
|
140
209
|
end
|
|
141
210
|
end
|
|
142
211
|
|
|
143
|
-
class
|
|
212
|
+
class LZMA2ObjectCreator < CommandObjectCreator
|
|
144
213
|
def content_type
|
|
145
|
-
"application/
|
|
214
|
+
"application/x-xz"
|
|
146
215
|
end
|
|
147
216
|
|
|
148
217
|
def file_extension
|
|
149
|
-
"
|
|
218
|
+
"xz"
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
private
|
|
222
|
+
|
|
223
|
+
def command
|
|
224
|
+
"xz"
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def default_parameter
|
|
228
|
+
"-qf0"
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def store_as
|
|
232
|
+
"lzma2"
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
class ZstdObjectCreator < CommandObjectCreator
|
|
237
|
+
def content_type
|
|
238
|
+
"application/x-zst"
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def file_extension
|
|
242
|
+
"zst"
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
private
|
|
246
|
+
|
|
247
|
+
def command
|
|
248
|
+
"zstd"
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def store_as
|
|
252
|
+
"zstd"
|
|
150
253
|
end
|
|
151
254
|
end
|
|
152
255
|
end
|
|
@@ -10,7 +10,7 @@ module Fluent::Plugin
|
|
|
10
10
|
class GCSOutput < Output
|
|
11
11
|
Fluent::Plugin.register_output("gcs", self)
|
|
12
12
|
|
|
13
|
-
helpers :
|
|
13
|
+
helpers :formatter, :inject
|
|
14
14
|
|
|
15
15
|
def initialize
|
|
16
16
|
super
|
|
@@ -34,20 +34,20 @@ module Fluent::Plugin
|
|
|
34
34
|
desc: "Format of GCS object keys"
|
|
35
35
|
config_param :path, :string, default: "",
|
|
36
36
|
desc: "Path prefix of the files on GCS"
|
|
37
|
-
config_param :store_as, :enum, list: %i(gzip gzip_command json text), default: :gzip,
|
|
37
|
+
config_param :store_as, :enum, list: %i(gzip gzip_command lzo lzma2 zstd json text), default: :gzip,
|
|
38
38
|
desc: "Archive format on GCS"
|
|
39
39
|
config_param :transcoding, :bool, default: false,
|
|
40
40
|
desc: "Enable the decompressive form of transcoding"
|
|
41
|
-
config_param :gzip_command_parameter, :string, default: "",
|
|
42
|
-
desc: "
|
|
41
|
+
config_param :gzip_command_parameter, :string, default: nil, deprecated: "Use command_parameter instead.",
|
|
42
|
+
desc: "Deprecated alias of command_parameter for the gzip_command compressor"
|
|
43
|
+
config_param :command_parameter, :string, default: nil,
|
|
44
|
+
desc: "Override the default arguments for the gzip_command / lzo / lzma2 / zstd compression command"
|
|
43
45
|
config_param :auto_create_bucket, :bool, default: true,
|
|
44
46
|
desc: "Create GCS bucket if it does not exists"
|
|
45
47
|
config_param :hex_random_length, :integer, default: 4,
|
|
46
48
|
desc: "Max length of `%{hex_random}` placeholder(4-16)"
|
|
47
49
|
config_param :overwrite, :bool, default: false,
|
|
48
50
|
desc: "Overwrite already existing path"
|
|
49
|
-
config_param :format, :string, default: "out_file",
|
|
50
|
-
desc: "Change one line format in the GCS object"
|
|
51
51
|
config_param :acl, :enum, list: %i(auth_read owner_full owner_read private project_private public_read), default: nil,
|
|
52
52
|
desc: "Permission for the object in GCS"
|
|
53
53
|
config_param :storage_class, :enum, list: %i(dra nearline coldline multi_regional regional standard), default: nil,
|
|
@@ -61,10 +61,8 @@ module Fluent::Plugin
|
|
|
61
61
|
config_param :value, :string, default: ""
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
-
DEFAULT_FORMAT_TYPE = "out_file"
|
|
65
|
-
|
|
66
64
|
config_section :format do
|
|
67
|
-
config_set_default :@type,
|
|
65
|
+
config_set_default :@type, "out_file"
|
|
68
66
|
end
|
|
69
67
|
|
|
70
68
|
config_section :buffer do
|
|
@@ -75,7 +73,6 @@ module Fluent::Plugin
|
|
|
75
73
|
MAX_HEX_RANDOM_LENGTH = 32
|
|
76
74
|
|
|
77
75
|
def configure(conf)
|
|
78
|
-
compat_parameters_convert(conf, :buffer, :formatter, :inject)
|
|
79
76
|
super
|
|
80
77
|
|
|
81
78
|
if @hex_random_length > MAX_HEX_RANDOM_LENGTH
|
|
@@ -93,16 +90,17 @@ module Fluent::Plugin
|
|
|
93
90
|
|
|
94
91
|
@formatter = formatter_create
|
|
95
92
|
|
|
93
|
+
# gzip_command_parameter is a deprecated alias of command_parameter; the
|
|
94
|
+
# explicit command_parameter wins when both are set.
|
|
95
|
+
command_parameter = @command_parameter || @gzip_command_parameter
|
|
96
|
+
|
|
96
97
|
@object_creator = Fluent::GCS.discovered_object_creator(
|
|
97
98
|
@store_as,
|
|
98
99
|
transcoding: @transcoding,
|
|
99
|
-
command_parameter:
|
|
100
|
+
command_parameter: command_parameter,
|
|
100
101
|
log: log
|
|
101
102
|
)
|
|
102
|
-
|
|
103
|
-
# TODO: Remove time_slice_format when end of support compat_parameters
|
|
104
|
-
@configured_time_slice_format = conf['time_slice_format']
|
|
105
|
-
@time_slice_with_tz = Fluent::Timezone.formatter(@timekey_zone, @configured_time_slice_format || timekey_to_timeformat(@buffer_config['timekey']))
|
|
103
|
+
@time_slice_with_tz = Fluent::Timezone.formatter(@timekey_zone, timekey_to_timeformat(@buffer_config['timekey']))
|
|
106
104
|
|
|
107
105
|
if @credentials_json
|
|
108
106
|
@credentials = @credentials_json
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fluent-plugin-gcs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Daichi HIRATA
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: fluentd
|
|
@@ -16,20 +16,20 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0
|
|
19
|
+
version: '1.0'
|
|
20
20
|
- - "<"
|
|
21
21
|
- !ruby/object:Gem::Version
|
|
22
|
-
version: '
|
|
22
|
+
version: '3'
|
|
23
23
|
type: :runtime
|
|
24
24
|
prerelease: false
|
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
|
26
26
|
requirements:
|
|
27
27
|
- - ">="
|
|
28
28
|
- !ruby/object:Gem::Version
|
|
29
|
-
version: 0
|
|
29
|
+
version: '1.0'
|
|
30
30
|
- - "<"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '
|
|
32
|
+
version: '3'
|
|
33
33
|
- !ruby/object:Gem::Dependency
|
|
34
34
|
name: google-cloud-storage
|
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -44,7 +44,8 @@ dependencies:
|
|
|
44
44
|
- - "~>"
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
46
|
version: '1.1'
|
|
47
|
-
description:
|
|
47
|
+
description: Fluentd output plugin that buffers events and uploads them to Google
|
|
48
|
+
Cloud Storage as gzip, json, or text objects.
|
|
48
49
|
email:
|
|
49
50
|
- hirata.daichi@gmail.com
|
|
50
51
|
executables: []
|
|
@@ -67,7 +68,10 @@ files:
|
|
|
67
68
|
homepage: https://github.com/daichirata/fluent-plugin-gcs
|
|
68
69
|
licenses:
|
|
69
70
|
- Apache-2.0
|
|
70
|
-
metadata:
|
|
71
|
+
metadata:
|
|
72
|
+
source_code_uri: https://github.com/daichirata/fluent-plugin-gcs
|
|
73
|
+
bug_tracker_uri: https://github.com/daichirata/fluent-plugin-gcs/issues
|
|
74
|
+
rubygems_mfa_required: 'true'
|
|
71
75
|
post_install_message:
|
|
72
76
|
rdoc_options: []
|
|
73
77
|
require_paths:
|
|
@@ -76,7 +80,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
76
80
|
requirements:
|
|
77
81
|
- - ">="
|
|
78
82
|
- !ruby/object:Gem::Version
|
|
79
|
-
version: '
|
|
83
|
+
version: '3.3'
|
|
80
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
85
|
requirements:
|
|
82
86
|
- - ">="
|