fluent-plugin-azure-logs-ingestion 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +8 -0
- data/LICENSE +13 -0
- data/README.md +209 -0
- data/README_ja.md +209 -0
- data/Rakefile +10 -0
- data/lib/fluent/plugin/azure_logs_ingestion/auth.rb +184 -0
- data/lib/fluent/plugin/azure_logs_ingestion/client.rb +92 -0
- data/lib/fluent/plugin/azure_logs_ingestion/payload_builder.rb +112 -0
- data/lib/fluent/plugin/azure_logs_ingestion/version.rb +9 -0
- data/lib/fluent/plugin/out_azure_logs_ingestion.rb +99 -0
- data/test/helper.rb +6 -0
- data/test/support/fake_azure_server.rb +100 -0
- data/test/support/helpers.rb +41 -0
- data/test/test_auth_msi.rb +76 -0
- data/test/test_auth_service_principal.rb +57 -0
- data/test/test_out_azure_logs_ingestion.rb +60 -0
- data/test/test_out_azure_logs_ingestion_write.rb +170 -0
- data/test/test_payload_builder.rb +114 -0
- metadata +108 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: e5482480b26a287f04f3869ea12ddbe976dac865aa507f43538a99fcfc060c51
|
|
4
|
+
data.tar.gz: 171a565a9d11e5a020e254682540a85adafa8b90eb46e2732819b1a63f67dab6
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 26314140847581088a5b2fa71e0fb4abeff7de0fae64ce31afcab42e40c5298d93df43def1809b1f0faf442075e57ebb4e796bd78a95989409550ead49da0037
|
|
7
|
+
data.tar.gz: 5006e5ddbeff5b09a2a2f6d7e79a68e78d1a7912ccf8e390e6aa997c71f97f151a6e0fefeec6b295daae0bbf014f3c361d6bc4c901c3886be0da33a049700da5
|
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Copyright 2026- fukasawah
|
|
2
|
+
|
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
you may not use this file except in compliance with the License.
|
|
5
|
+
You may obtain a copy of the License at
|
|
6
|
+
|
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
See the License for the specific language governing permissions and
|
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# fluent-plugin-azure-logs-ingestion
|
|
2
|
+
|
|
3
|
+
Fluentd output plugin that sends records to Log Analytics Workspace tables by using the Azure Monitor Logs Ingestion API.
|
|
4
|
+
|
|
5
|
+
> [!WARNING]
|
|
6
|
+
> This plugin is experimental and has not yet been sufficiently proven in serious production workloads.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
### RubyGems
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
fluent-gem install fluent-plugin-azure-logs-ingestion
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
If you use `td-agent`, use `td-agent-gem` instead of `fluent-gem`.
|
|
17
|
+
|
|
18
|
+
### Bundler
|
|
19
|
+
|
|
20
|
+
Add the following line to your Gemfile.
|
|
21
|
+
|
|
22
|
+
```ruby
|
|
23
|
+
gem 'fluent-plugin-azure-logs-ingestion'
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Then run `bundle install`.
|
|
27
|
+
|
|
28
|
+
### Install From GitHub With Bundler
|
|
29
|
+
|
|
30
|
+
Bundler can point directly at the GitHub repository. Specify `ref` when you want to pin a specific revision.
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
gem 'fluent-plugin-azure-logs-ingestion', git: 'https://github.com/fukasawah/fluent-plugin-azure-logs-ingestion.git', ref: 'abda3b5370ccd61282c8b234ca05042049e09d15'
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Then run `bundle install`.
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
## Configuration Example
|
|
40
|
+
|
|
41
|
+
```conf
|
|
42
|
+
|
|
43
|
+
<match azure.logs>
|
|
44
|
+
@type azure_logs_ingestion
|
|
45
|
+
endpoint https://example.japaneast-1.ingest.monitor.azure.com
|
|
46
|
+
dcr_immutable_id dcr-000a00a000a00000a000000aa000a0aa
|
|
47
|
+
stream_name Custom-MyTable
|
|
48
|
+
|
|
49
|
+
tenant_id YOUR_TENANT_ID
|
|
50
|
+
client_id YOUR_CLIENT_ID
|
|
51
|
+
client_secret YOUR_CLIENT_SECRET
|
|
52
|
+
|
|
53
|
+
<buffer>
|
|
54
|
+
@type file
|
|
55
|
+
path /var/log/fluent/azure-logs-ingestion-buffer.*.buf
|
|
56
|
+
chunk_limit_size 900KB
|
|
57
|
+
</buffer>
|
|
58
|
+
</match>
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Configuration
|
|
62
|
+
|
|
63
|
+
### Parameters
|
|
64
|
+
|
|
65
|
+
| Parameter | Required | Default | Description |
|
|
66
|
+
| --- | --- | --- | --- |
|
|
67
|
+
| `endpoint` | yes | none | Logs Ingestion endpoint or DCE endpoint |
|
|
68
|
+
| `dcr_immutable_id` | yes | none | Immutable DCR ID in `dcr-...` format |
|
|
69
|
+
| `stream_name` | yes | none | DCR input stream name specified in the request URI |
|
|
70
|
+
| `gzip` | no | `false` | Send the HTTP request body compressed with gzip |
|
|
71
|
+
| `use_msi` | no | `false` | Use Managed Identity instead of a service principal |
|
|
72
|
+
| `tenant_id` | no | `ENV['AZURE_TENANT_ID']` | Tenant ID used for service principal authentication |
|
|
73
|
+
| `client_id` | no | `ENV['AZURE_CLIENT_ID']` | Service principal client ID, or user-assigned managed identity client ID |
|
|
74
|
+
| `client_secret` | no | `ENV['AZURE_CLIENT_SECRET']` | Service principal client secret |
|
|
75
|
+
| `authority_host` | no | `https://login.microsoftonline.com` | OAuth token endpoint base URL |
|
|
76
|
+
| `logs_ingestion_scope` | no | `https://monitor.azure.com/.default` | OAuth scope for the Logs Ingestion API |
|
|
77
|
+
| `token_refresh_skew` | no | `300s` | How many seconds before expiry to refresh the Azure access token |
|
|
78
|
+
|
|
79
|
+
### Buffer Parameters
|
|
80
|
+
|
|
81
|
+
This plugin changes only the buffer defaults needed for a production-friendly file buffer and a chunk size that is likely to fit within the Logs Ingestion API request size limit.
|
|
82
|
+
|
|
83
|
+
| Buffer parameter | Default | Description |
|
|
84
|
+
| --- | --- | --- |
|
|
85
|
+
| `@type` | `file` | Use a file buffer by default |
|
|
86
|
+
| `chunk_limit_size` | `900KB` | Chunk size with headroom against the Logs Ingestion API 1 MB request size limit |
|
|
87
|
+
|
|
88
|
+
### Authentication
|
|
89
|
+
|
|
90
|
+
Service principal credentials can be written directly in the Fluentd configuration or read from environment variables.
|
|
91
|
+
|
|
92
|
+
Available environment variables:
|
|
93
|
+
|
|
94
|
+
- `AZURE_TENANT_ID`
|
|
95
|
+
- `AZURE_CLIENT_ID`
|
|
96
|
+
- `AZURE_CLIENT_SECRET`
|
|
97
|
+
|
|
98
|
+
When using Managed Identity, specify `use_msi true` and omit `tenant_id` and `client_secret`.
|
|
99
|
+
When using User-assigned Managed Identity, specify the User-assigned Managed Identity client ID in `client_id`.
|
|
100
|
+
|
|
101
|
+
### Managed Identity Example
|
|
102
|
+
|
|
103
|
+
```conf
|
|
104
|
+
<match azure.logs>
|
|
105
|
+
@type azure_logs_ingestion
|
|
106
|
+
endpoint https://example.japaneast-1.ingest.monitor.azure.com
|
|
107
|
+
dcr_immutable_id dcr-000a00a000a00000a000000aa000a0aa
|
|
108
|
+
stream_name Custom-MyTable
|
|
109
|
+
use_msi true
|
|
110
|
+
client_id YOUR_USER_ASSIGNED_MANAGED_IDENTITY_CLIENT_ID
|
|
111
|
+
|
|
112
|
+
<buffer>
|
|
113
|
+
@type file
|
|
114
|
+
path /var/log/fluent/azure-logs-ingestion-buffer.*.buf
|
|
115
|
+
</buffer>
|
|
116
|
+
</match>
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Buffer Configuration Notes
|
|
120
|
+
|
|
121
|
+
- `chunk_limit_size 900KB`: The Logs Ingestion API request size limit is 1 MB. One chunk should fit in one request, and starting around 900 KB is safer because JSON serialization can increase the API request size.
|
|
122
|
+
- `flush_mode` and `flush_interval` use Fluentd defaults. If you need lower delivery latency, specify them explicitly as normal Fluentd buffer settings.
|
|
123
|
+
|
|
124
|
+
### 30 Minute Limit On Auxiliary Tier
|
|
125
|
+
|
|
126
|
+
When sending to the Log Analytics Workspace Auxiliary tier without converting `TimeGenerated` in a DCR transformation, the range of `TimeGenerated` values in one request must be less than 30 minutes. To satisfy this limit, treat the original log timestamp as the Fluentd event time, then split chunks by time with `<buffer time>` and `timekey`.
|
|
127
|
+
|
|
128
|
+
For example, if the record field `created_at` is an ISO8601 string, convert it to event time with the input parser. Specify `keep_time_key true` when you also want to send `created_at` to Azure.
|
|
129
|
+
|
|
130
|
+
```conf
|
|
131
|
+
<source>
|
|
132
|
+
@type tail
|
|
133
|
+
path /var/log/myapp/app.log
|
|
134
|
+
tag azure.logs
|
|
135
|
+
|
|
136
|
+
<parse>
|
|
137
|
+
@type json
|
|
138
|
+
time_key created_at
|
|
139
|
+
time_format %iso8601
|
|
140
|
+
keep_time_key true
|
|
141
|
+
</parse>
|
|
142
|
+
</source>
|
|
143
|
+
|
|
144
|
+
<match azure.logs>
|
|
145
|
+
@type azure_logs_ingestion
|
|
146
|
+
# ...
|
|
147
|
+
<buffer time>
|
|
148
|
+
@type file
|
|
149
|
+
# ...
|
|
150
|
+
timekey 20m
|
|
151
|
+
</buffer>
|
|
152
|
+
</match>
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
If you need to replace the event time after a record has already been ingested, you can use `renew_time_key` in a filter. The field specified in `renew_time_key` must be a Unix timestamp.
|
|
156
|
+
|
|
157
|
+
```conf
|
|
158
|
+
<filter azure.logs>
|
|
159
|
+
@type record_transformer
|
|
160
|
+
renew_time_key created_at
|
|
161
|
+
</filter>
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
<match azure.logs>
|
|
165
|
+
@type azure_logs_ingestion
|
|
166
|
+
# ...
|
|
167
|
+
<buffer time>
|
|
168
|
+
@type file
|
|
169
|
+
# ...
|
|
170
|
+
timekey 20m
|
|
171
|
+
</buffer>
|
|
172
|
+
</match>
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
The `time` in `<buffer time>` is the Fluentd event time, not a `time` field inside the record. Merely leaving `created_at` or `TimeGenerated` in the payload does not make it available for time-based chunking.
|
|
176
|
+
|
|
177
|
+
### Plugin Behavior
|
|
178
|
+
|
|
179
|
+
- This plugin does not rewrite `TimeGenerated`. If the payload has an original timestamp field such as `time`, prefer creating it in the DCR transformation, for example `extend TimeGenerated = todatetime(['time'])`.
|
|
180
|
+
- HTTP `400`, `401`, `403`, and `413` are treated as unrecoverable. `429` and `5xx` are retried by Fluentd.
|
|
181
|
+
|
|
182
|
+
## Memo: Log Analytics Workspace / DCR / Logs Ingestion API Behavior
|
|
183
|
+
|
|
184
|
+
- Currently, when the Log Analytics Workspace SKU is Auxiliary tier and the DCR transformation is not used, `TimeGenerated` in one request must stay within less than 30 minutes.
|
|
185
|
+
- > This limit only applies when ingesting to Auxiliary log tables. If the source entries for TimeGenerated are ingested without being transformed, the range of entries must be less than 30 minutes.
|
|
186
|
+
>
|
|
187
|
+
> https://learn.microsoft.com/en-us/azure/azure-monitor/fundamentals/service-limits#logs-ingestion-api
|
|
188
|
+
- Logs Ingestion API request size must be kept to 1 MB or less.
|
|
189
|
+
- > Maximum size of API call | 1 MB
|
|
190
|
+
>
|
|
191
|
+
> https://learn.microsoft.com/en-us/azure/azure-monitor/fundamentals/service-limits#logs-ingestion-api
|
|
192
|
+
- Log Analytics Workspace has no deduplication mechanism. If Azure accepts a request but Fluentd cannot confirm the response successfully, retrying can create duplicate records.
|
|
193
|
+
|
|
194
|
+
## References
|
|
195
|
+
|
|
196
|
+
- Azure Monitor Logs Ingestion API overview: https://learn.microsoft.com/azure/azure-monitor/logs/logs-ingestion-api-overview
|
|
197
|
+
- Create data collection rules (DCRs) using JSON: https://learn.microsoft.com/azure/azure-monitor/data-collection/data-collection-rule-create-edit
|
|
198
|
+
- Azure DCR structure: https://learn.microsoft.com/azure/azure-monitor/data-collection/data-collection-rule-structure
|
|
199
|
+
- Azure custom tables and `_CL` suffix: https://learn.microsoft.com/azure/azure-monitor/logs/create-custom-table
|
|
200
|
+
- Managed identity on Azure VM: https://learn.microsoft.com/entra/identity/managed-identities-azure-resources/how-to-use-vm-token
|
|
201
|
+
- Managed identity on App Service / Functions: https://learn.microsoft.com/azure/app-service/overview-managed-identity
|
|
202
|
+
- Fluentd output plugin API: https://docs.fluentd.org/plugin-development/api-plugin-output
|
|
203
|
+
|
|
204
|
+
## Development
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
bundle install
|
|
208
|
+
bundle exec rake test
|
|
209
|
+
```
|
data/README_ja.md
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# fluent-plugin-azure-logs-ingestion
|
|
2
|
+
|
|
3
|
+
Azure Monitor Logs Ingestion API を使い、Log Analytics Workspace のテーブルへ出力する Fluentd output plugin です。
|
|
4
|
+
|
|
5
|
+
> [!WARNING]
|
|
6
|
+
> この plugin は試験的な実装であり、本格的な production workload での動作実績はまだ十分ではありません。
|
|
7
|
+
|
|
8
|
+
## インストール
|
|
9
|
+
|
|
10
|
+
### RubyGems
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
fluent-gem install fluent-plugin-azure-logs-ingestion
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
`td-agent` を使う場合は `fluent-gem` の代わりに `td-agent-gem` を使ってください。
|
|
17
|
+
|
|
18
|
+
### Bundler
|
|
19
|
+
|
|
20
|
+
Gemfile に次の行を追加してください。
|
|
21
|
+
|
|
22
|
+
```ruby
|
|
23
|
+
gem 'fluent-plugin-azure-logs-ingestion'
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
その後、`bundle install` を実行してください。
|
|
27
|
+
|
|
28
|
+
### GitHub から Bundler でインストール
|
|
29
|
+
|
|
30
|
+
Bundler で GitHub repository を直接指定でき、特定の revision に固定したい場合は、`ref` を指定します。
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
gem 'fluent-plugin-azure-logs-ingestion', git: 'https://github.com/fukasawah/fluent-plugin-azure-logs-ingestion.git', ref: 'abda3b5370ccd61282c8b234ca05042049e09d15'
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
その後、`bundle install` を実行してください。
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
## 設定例
|
|
40
|
+
|
|
41
|
+
```conf
|
|
42
|
+
|
|
43
|
+
<match azure.logs>
|
|
44
|
+
@type azure_logs_ingestion
|
|
45
|
+
endpoint https://example.japaneast-1.ingest.monitor.azure.com
|
|
46
|
+
dcr_immutable_id dcr-000a00a000a00000a000000aa000a0aa
|
|
47
|
+
stream_name Custom-MyTable
|
|
48
|
+
|
|
49
|
+
tenant_id YOUR_TENANT_ID
|
|
50
|
+
client_id YOUR_CLIENT_ID
|
|
51
|
+
client_secret YOUR_CLIENT_SECRET
|
|
52
|
+
|
|
53
|
+
<buffer>
|
|
54
|
+
@type file
|
|
55
|
+
path /var/log/fluent/azure-logs-ingestion-buffer.*.buf
|
|
56
|
+
chunk_limit_size 900KB
|
|
57
|
+
</buffer>
|
|
58
|
+
</match>
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 設定
|
|
62
|
+
|
|
63
|
+
### パラメータ
|
|
64
|
+
|
|
65
|
+
| Parameter | Required | Default | Description |
|
|
66
|
+
| --- | --- | --- | --- |
|
|
67
|
+
| `endpoint` | yes | none | Logs Ingestion endpoint または DCE endpoint |
|
|
68
|
+
| `dcr_immutable_id` | yes | none | `dcr-...` 形式の immutable DCR ID |
|
|
69
|
+
| `stream_name` | yes | none | request URI に指定する DCR の input stream 名 |
|
|
70
|
+
| `gzip` | no | `false` | HTTP request body を gzip 圧縮して送信 |
|
|
71
|
+
| `use_msi` | no | `false` | service principal ではなく Managed Identity を使う |
|
|
72
|
+
| `tenant_id` | no | `ENV['AZURE_TENANT_ID']` | service principal 認証で使う tenant ID |
|
|
73
|
+
| `client_id` | no | `ENV['AZURE_CLIENT_ID']` | service principal の client ID、または user-assigned managed identity の client ID |
|
|
74
|
+
| `client_secret` | no | `ENV['AZURE_CLIENT_SECRET']` | service principal の client secret |
|
|
75
|
+
| `authority_host` | no | `https://login.microsoftonline.com` | OAuth token endpoint の base URL |
|
|
76
|
+
| `logs_ingestion_scope` | no | `https://monitor.azure.com/.default` | Logs Ingestion API 用の OAuth scope |
|
|
77
|
+
| `token_refresh_skew` | no | `300s` | Azure のアクセストークンを期限の何秒前に再取得するか |
|
|
78
|
+
|
|
79
|
+
### Buffer パラメータ
|
|
80
|
+
|
|
81
|
+
この plugin は、production workload で使いやすい file buffer と、Logs Ingestion API の request size 上限に収まりやすい chunk size だけを buffer default として変更しています。
|
|
82
|
+
|
|
83
|
+
| Buffer parameter | Default | Description |
|
|
84
|
+
| --- | --- | --- |
|
|
85
|
+
| `@type` | `file` | デフォルトで file buffer を使う |
|
|
86
|
+
| `chunk_limit_size` | `900KB` | Logs Ingestion API の 1 MB request size 上限に対して余裕を持たせた chunk size |
|
|
87
|
+
|
|
88
|
+
### 認証
|
|
89
|
+
|
|
90
|
+
service principal の認証情報は Fluentd 設定に直接書くことも、環境変数から読むこともできます。
|
|
91
|
+
|
|
92
|
+
利用できる環境変数:
|
|
93
|
+
|
|
94
|
+
- `AZURE_TENANT_ID`
|
|
95
|
+
- `AZURE_CLIENT_ID`
|
|
96
|
+
- `AZURE_CLIENT_SECRET`
|
|
97
|
+
|
|
98
|
+
Managed Identity を使う場合は `use_msi true` を指定し、`tenant_id` と `client_secret` は省略してください。
|
|
99
|
+
User-assigned Managed Identity を使う場合は `client_id` に User-assigned Managed Identity の client ID を指定します。
|
|
100
|
+
|
|
101
|
+
### Managed Identity の例
|
|
102
|
+
|
|
103
|
+
```conf
|
|
104
|
+
<match azure.logs>
|
|
105
|
+
@type azure_logs_ingestion
|
|
106
|
+
endpoint https://example.japaneast-1.ingest.monitor.azure.com
|
|
107
|
+
dcr_immutable_id dcr-000a00a000a00000a000000aa000a0aa
|
|
108
|
+
stream_name Custom-MyTable
|
|
109
|
+
use_msi true
|
|
110
|
+
client_id YOUR_USER_ASSIGNED_MANAGED_IDENTITY_CLIENT_ID
|
|
111
|
+
|
|
112
|
+
<buffer>
|
|
113
|
+
@type file
|
|
114
|
+
path /var/log/fluent/azure-logs-ingestion-buffer.*.buf
|
|
115
|
+
</buffer>
|
|
116
|
+
</match>
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Buffer 設定の考え方
|
|
120
|
+
|
|
121
|
+
- `chunk_limit_size 900KB`: Logs Ingestion API の request size 上限は 1 MB です。1 chunk を 1 request に収めるほうがよいですが、APIリクエストの際の JSON 化によるサイズ増加の余裕を見て 900KB 前後から始めるのが安全です。
|
|
122
|
+
- `flush_mode` や `flush_interval` は Fluentd の default を使います。より短い遅延で送信したい場合は、通常の Fluentd buffer 設定として明示してください。
|
|
123
|
+
|
|
124
|
+
### Auxiliary tier での 30 分制限
|
|
125
|
+
|
|
126
|
+
Log Analytics Workspace の Auxiliary tier へ送信し、DCR transformation で `TimeGenerated` を変換しない場合、1 request 内の `TimeGenerated` の範囲は 30 分未満にする必要があります。この制限に対応するには、元ログの時刻を Fluentd の event time として扱い、`<buffer time>` と `timekey` で chunk を時間分割してください。
|
|
127
|
+
|
|
128
|
+
たとえば record の `created_at` が ISO8601 文字列の場合、入力時の parser で event time に変換します。Azure 側にも `created_at` を送る場合は `keep_time_key true` を指定します。
|
|
129
|
+
|
|
130
|
+
```conf
|
|
131
|
+
<source>
|
|
132
|
+
@type tail
|
|
133
|
+
path /var/log/myapp/app.log
|
|
134
|
+
tag azure.logs
|
|
135
|
+
|
|
136
|
+
<parse>
|
|
137
|
+
@type json
|
|
138
|
+
time_key created_at
|
|
139
|
+
time_format %iso8601
|
|
140
|
+
keep_time_key true
|
|
141
|
+
</parse>
|
|
142
|
+
</source>
|
|
143
|
+
|
|
144
|
+
<match azure.logs>
|
|
145
|
+
@type azure_logs_ingestion
|
|
146
|
+
# ...
|
|
147
|
+
<buffer time>
|
|
148
|
+
@type file
|
|
149
|
+
# ...
|
|
150
|
+
timekey 20m
|
|
151
|
+
</buffer>
|
|
152
|
+
</match>
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
すでに record として取り込まれた後に event time を差し替える場合は、filter で `renew_time_key` を使えます。ただし `renew_time_key` に指定するフィールドは Unix timestamp である必要があります。
|
|
156
|
+
|
|
157
|
+
```conf
|
|
158
|
+
<filter azure.logs>
|
|
159
|
+
@type record_transformer
|
|
160
|
+
renew_time_key created_at
|
|
161
|
+
</filter>
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
<match azure.logs>
|
|
165
|
+
@type azure_logs_ingestion
|
|
166
|
+
# ...
|
|
167
|
+
<buffer time>
|
|
168
|
+
@type file
|
|
169
|
+
# ...
|
|
170
|
+
timekey 20m
|
|
171
|
+
</buffer>
|
|
172
|
+
</match>
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
`<buffer time>` の `time` は record 内の `time` フィールドではなく Fluentd の event time です。record の `created_at` や `TimeGenerated` を payload に残すだけでは chunk の時間分割には使われません。
|
|
176
|
+
|
|
177
|
+
### Plugin 仕様
|
|
178
|
+
|
|
179
|
+
- 本 plugin は `TimeGenerated` を書き換えません。payload に `time` のような元時刻フィールドがある場合は、DCR の transformation で `extend TimeGenerated = todatetime(['time'])` のように作成することをお勧めします。
|
|
180
|
+
- HTTP `400`, `401`, `403`, `413` は unrecoverable として扱い、`429` と `5xx` は Fluentd の retry 対象です。
|
|
181
|
+
|
|
182
|
+
## Memo: Log Analytics Workspace / DCR / Logs Ingestion API の仕様
|
|
183
|
+
|
|
184
|
+
- 現状、Log Analytics Workspace の SKU が Auxiliary tier で DCR の transformation を使わない場合、1 request 内の `TimeGenerated` は 30 分未満に収める必要があります。
|
|
185
|
+
- > This limit only applies when ingesting to Auxiliary log tables. If the source entries for TimeGenerated are ingested without being transformed, the range of entries must be less than 30 minutes.
|
|
186
|
+
>
|
|
187
|
+
> https://learn.microsoft.com/en-us/azure/azure-monitor/fundamentals/service-limits#logs-ingestion-api
|
|
188
|
+
- Logs Ingestion API の request size は 1 MB 以下に抑える必要があります。
|
|
189
|
+
- > Maximum size of API call | 1 MB
|
|
190
|
+
>
|
|
191
|
+
> https://learn.microsoft.com/en-us/azure/azure-monitor/fundamentals/service-limits#logs-ingestion-api
|
|
192
|
+
- Log Analytics Workspace には重複排除の仕組みがありません。Azure が受理した後で Fluentd が応答を正常に確認できなかった場合、再送により重複が発生します。
|
|
193
|
+
|
|
194
|
+
## 参考資料
|
|
195
|
+
|
|
196
|
+
- Azure Monitor Logs Ingestion API overview: https://learn.microsoft.com/azure/azure-monitor/logs/logs-ingestion-api-overview
|
|
197
|
+
- Create data collection rules (DCRs) using JSON: https://learn.microsoft.com/azure/azure-monitor/data-collection/data-collection-rule-create-edit
|
|
198
|
+
- Azure DCR structure: https://learn.microsoft.com/azure/azure-monitor/data-collection/data-collection-rule-structure
|
|
199
|
+
- Azure custom tables and `_CL` suffix: https://learn.microsoft.com/azure/azure-monitor/logs/create-custom-table
|
|
200
|
+
- Managed identity on Azure VM: https://learn.microsoft.com/entra/identity/managed-identities-azure-resources/how-to-use-vm-token
|
|
201
|
+
- Managed identity on App Service / Functions: https://learn.microsoft.com/azure/app-service/overview-managed-identity
|
|
202
|
+
- Fluentd output plugin API: https://docs.fluentd.org/plugin-development/api-plugin-output
|
|
203
|
+
|
|
204
|
+
## 開発
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
bundle install
|
|
208
|
+
bundle exec rake test
|
|
209
|
+
```
|
data/Rakefile
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'net/http'
|
|
5
|
+
require 'uri'
|
|
6
|
+
require 'time'
|
|
7
|
+
require 'openssl'
|
|
8
|
+
|
|
9
|
+
module Fluent
|
|
10
|
+
module Plugin
|
|
11
|
+
module AzureLogsIngestion
|
|
12
|
+
class Auth
|
|
13
|
+
Token = Struct.new(:value, :expires_at, keyword_init: true)
|
|
14
|
+
|
|
15
|
+
IMDS_API_VERSION = '2018-02-01'
|
|
16
|
+
APP_SERVICE_API_VERSION = '2019-08-01'
|
|
17
|
+
IMDS_ENDPOINT = 'http://169.254.169.254/metadata/identity/oauth2/token'
|
|
18
|
+
|
|
19
|
+
def initialize(use_msi:, tenant_id:, client_id:, client_secret:, authority_host:, logs_ingestion_scope:, token_refresh_skew:, logger:)
|
|
20
|
+
@use_msi = use_msi
|
|
21
|
+
@tenant_id = tenant_id
|
|
22
|
+
@client_id = client_id
|
|
23
|
+
@client_secret = client_secret
|
|
24
|
+
@authority_host = authority_host
|
|
25
|
+
@logs_ingestion_scope = logs_ingestion_scope
|
|
26
|
+
@token_refresh_skew = token_refresh_skew
|
|
27
|
+
@log = logger
|
|
28
|
+
@token = nil
|
|
29
|
+
@mutex = Mutex.new
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def token
|
|
33
|
+
@mutex.synchronize do
|
|
34
|
+
if token_valid?(@token)
|
|
35
|
+
@log.debug('reusing cached access token', expires_at: @token.expires_at.utc.iso8601)
|
|
36
|
+
return @token.value
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
@token = @use_msi ? fetch_msi_token : fetch_service_principal_token
|
|
40
|
+
@log.debug('fetched new access token', mode: @use_msi ? 'managed_identity' : 'service_principal', expires_at: @token.expires_at.utc.iso8601)
|
|
41
|
+
@token.value
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def token_valid?(token)
|
|
48
|
+
token && token.expires_at && (Time.now + @token_refresh_skew) < token.expires_at
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def fetch_service_principal_token
|
|
52
|
+
uri = URI.join(normalized_authority_host, "#{@tenant_id}/oauth2/v2.0/token")
|
|
53
|
+
@log.debug('requesting service principal token', authority_host: normalized_authority_host, tenant_id: @tenant_id, scope: @logs_ingestion_scope)
|
|
54
|
+
request = Net::HTTP::Post.new(uri)
|
|
55
|
+
request.set_form_data(
|
|
56
|
+
'grant_type' => 'client_credentials',
|
|
57
|
+
'client_id' => @client_id,
|
|
58
|
+
'client_secret' => @client_secret,
|
|
59
|
+
'scope' => @logs_ingestion_scope
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
response = perform_request(uri, request)
|
|
63
|
+
body = parse_json_body(response)
|
|
64
|
+
build_token_from_token_response(body)
|
|
65
|
+
rescue JSON::ParserError => error
|
|
66
|
+
raise "failed to parse service principal token response: #{error.message}"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def fetch_msi_token
|
|
70
|
+
if ENV['IDENTITY_ENDPOINT'] && (ENV['IDENTITY_HEADER'] || ENV['MSI_SECRET'])
|
|
71
|
+
fetch_app_service_token
|
|
72
|
+
else
|
|
73
|
+
fetch_imds_token
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def fetch_imds_token
|
|
78
|
+
uri = URI(ENV.fetch('AZURE_LOGS_INGESTION_IMDS_ENDPOINT', IMDS_ENDPOINT))
|
|
79
|
+
params = {
|
|
80
|
+
'api-version' => IMDS_API_VERSION,
|
|
81
|
+
'resource' => scope_resource(@logs_ingestion_scope)
|
|
82
|
+
}
|
|
83
|
+
params['client_id'] = @client_id if @client_id && !@client_id.empty?
|
|
84
|
+
uri.query = URI.encode_www_form(params)
|
|
85
|
+
|
|
86
|
+
request = Net::HTTP::Get.new(uri)
|
|
87
|
+
request['Metadata'] = 'true'
|
|
88
|
+
@log.debug('requesting managed identity token via IMDS', endpoint: uri.to_s, resource: params['resource'], client_id: params['client_id'])
|
|
89
|
+
|
|
90
|
+
response = perform_request(uri, request)
|
|
91
|
+
body = parse_json_body(response)
|
|
92
|
+
build_token_from_token_response(body)
|
|
93
|
+
rescue JSON::ParserError => error
|
|
94
|
+
raise "failed to parse IMDS token response: #{error.message}"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def fetch_app_service_token
|
|
98
|
+
uri = URI(ENV.fetch('IDENTITY_ENDPOINT'))
|
|
99
|
+
params = {
|
|
100
|
+
'api-version' => APP_SERVICE_API_VERSION,
|
|
101
|
+
'resource' => scope_resource(@logs_ingestion_scope)
|
|
102
|
+
}
|
|
103
|
+
params['client_id'] = @client_id if @client_id && !@client_id.empty?
|
|
104
|
+
existing = URI.decode_www_form(String(uri.query))
|
|
105
|
+
uri.query = URI.encode_www_form(existing + params.to_a)
|
|
106
|
+
|
|
107
|
+
request = Net::HTTP::Get.new(uri)
|
|
108
|
+
request['X-IDENTITY-HEADER'] = ENV['IDENTITY_HEADER'] if ENV['IDENTITY_HEADER']
|
|
109
|
+
request['Secret'] = ENV['MSI_SECRET'] if ENV['MSI_SECRET']
|
|
110
|
+
@log.debug('requesting managed identity token via app service endpoint', endpoint: uri.to_s, resource: params['resource'], client_id: params['client_id'])
|
|
111
|
+
|
|
112
|
+
response = perform_request(uri, request)
|
|
113
|
+
body = parse_json_body(response)
|
|
114
|
+
build_token_from_token_response(body)
|
|
115
|
+
rescue KeyError => error
|
|
116
|
+
raise Fluent::ConfigError, error.message
|
|
117
|
+
rescue JSON::ParserError => error
|
|
118
|
+
raise "failed to parse App Service managed identity response: #{error.message}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def perform_request(uri, request)
|
|
122
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
123
|
+
http.use_ssl = uri.scheme == 'https'
|
|
124
|
+
http.open_timeout = 10
|
|
125
|
+
http.read_timeout = 30
|
|
126
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_PEER if http.use_ssl?
|
|
127
|
+
|
|
128
|
+
response = http.request(request)
|
|
129
|
+
handle_token_response_errors(response)
|
|
130
|
+
response
|
|
131
|
+
rescue Timeout::Error, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET, EOFError, SocketError, IOError, SystemCallError => error
|
|
132
|
+
raise "token request failed: #{error.message}"
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def handle_token_response_errors(response)
|
|
136
|
+
return if response.is_a?(Net::HTTPSuccess)
|
|
137
|
+
|
|
138
|
+
message = "token endpoint returned #{response.code} #{response.message} #{String(response.body).strip}".strip
|
|
139
|
+
status = response.code.to_i
|
|
140
|
+
|
|
141
|
+
if @use_msi
|
|
142
|
+
raise message if retryable_msi_status?(status)
|
|
143
|
+
raise Fluent::UnrecoverableError, message if status >= 400 && status < 500
|
|
144
|
+
raise message
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
raise Fluent::UnrecoverableError, message if status >= 400 && status < 500
|
|
148
|
+
raise message
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def retryable_msi_status?(status)
|
|
152
|
+
status == 404 || status == 410 || status == 429 || status >= 500
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def parse_json_body(response)
|
|
156
|
+
JSON.parse(String(response.body))
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def build_token_from_token_response(body)
|
|
160
|
+
token = body.fetch('access_token')
|
|
161
|
+
expires_at = parse_token_expiry(body)
|
|
162
|
+
Token.new(value: token, expires_at: expires_at)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def parse_token_expiry(body)
|
|
166
|
+
return Time.at(Integer(body['expires_on'])) if body['expires_on']
|
|
167
|
+
return Time.now + Integer(body['expires_in']) if body['expires_in']
|
|
168
|
+
|
|
169
|
+
raise Fluent::UnrecoverableError, 'token response did not include expires_on or expires_in'
|
|
170
|
+
rescue ArgumentError, TypeError
|
|
171
|
+
raise Fluent::UnrecoverableError, 'token response included an invalid expiration value'
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def normalized_authority_host
|
|
175
|
+
@authority_host.end_with?('/') ? @authority_host : "#{@authority_host}/"
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def scope_resource(scope)
|
|
179
|
+
scope.sub(%r{/\.default\z}, '/')
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|