fluent-plugin-kusto 0.0.1.beta → 0.0.3.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 90834d37833dc31d3a1aaa3f3de064ee260f9c619753857be060c2e48ba4a79c
4
- data.tar.gz: 854001eaf38c3065262d2fd4abdb20c14e95af65ba8f08418825ddf6ac56ad94
3
+ metadata.gz: 2f46e06c2f84df5ddae86b8bffc55bbd0e7a92ad636d4c02512b7c9fa909e563
4
+ data.tar.gz: ef350fc500e82cbf80a0b91c1247463b8a4a00487324bbbc39f04a4017436d90
5
5
  SHA512:
6
- metadata.gz: 66e9089d652413b6a405dc49fafa2fb3fe5399461dd18fdd87025293790dc8d92c9fcf4b8122937f2c06ec766ea43a48716dce499a61c6d298ad03f23299d5c0
7
- data.tar.gz: 93e5fb41d9e4a76a5a34bad266abf596161f24143b1763d27a3378cf7f22afb87601f0a23b80775bb5cb75dc52b9876f6077f35ba59ae6dd3f19182790e32e68
6
+ metadata.gz: 00634a1a008a0dcb07929181b4946ad42b5d0d2d8b4aa1099498f13f72fd4b5479cb64f347fb065937d930870ca933dd6320cac8c096d0e689415dcd79564b5b
7
+ data.tar.gz: db35058e072ed1bdabce57439db87c161a026a4654bc61afd40f9a87f305c75a146017fbbdf91f3c71e7017a24b80ffed48eb061167c44361e3792615baa6b84
data/Gemfile CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- gemspec
5
+ gemspec name: 'fluent-plugin-kusto'
6
6
  gem 'fiddle'
7
7
  gem 'mocha'
8
8
  gem 'ostruct'
data/README.md CHANGED
@@ -50,7 +50,7 @@ $ gem install fluent-plugin-kusto --pre
50
50
  Add the following line to your Gemfile:
51
51
 
52
52
  ```ruby
53
- gem "fluent-plugin-kusto", "~> 0.0.1.beta"
53
+ gem "fluent-plugin-kusto", "~> 0.0.2.beta"
54
54
  ```
55
55
 
56
56
  **Note:** This is a beta release. Use the `--pre` flag with gem install or specify the beta version in your Gemfile.
@@ -116,76 +116,271 @@ A table with the expected schema must exist in order for data to be ingested pro
116
116
  .create table <table_name> (tag:string, timestamp:datetime, record:dynamic)
117
117
  ```
118
118
 
119
- ## Configuration parameters
120
-
121
- | Key | Description | Default |
122
- | -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ |
123
- | `tenant_id` | The tenant/domain ID of the Azure Active Directory (AAD) registered application. Required if `managed_identity_client_id` isn't set. | _none_ |
124
- | `client_id` | The client ID of the AAD registered application. Required if `managed_identity_client_id` isn't set. | _none_ |
125
- | `client_secret` | The client secret of the AAD registered application ([App Secret](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#option-2-create-a-new-application-secret)). Required if `managed_identity_client_id` isn't set. | _none_ |
126
- | `managed_identity_client_id` | The managed identity ID to authenticate with. Set to `SYSTEM` for system-assigned managed identity, or set to the MI client ID (`GUID`) for user-assigned managed identity. Required if `tenant_id`, `client_id`, and `client_secret` aren't set. | _none_ |
127
- | `endpoint` | The cluster's endpoint, usually in the form `https://cluster_name.region.kusto.windows.net` | _none_ |
128
- | `database_name` | The database name. | _none_ |
129
- | `table_name` | The table name. | _none_ |
130
- | `compression_enabled` | If enabled, sends compressed HTTP payload (gzip) to Kusto. | `true` |
131
- | `workers` | The number of [workers](../../../administration/multithreading#outputs) to perform flush operations for this output. | `0` |
132
- | `buffered` | Enable buffering into disk before ingesting into Azure Kusto. If `buffered` is `true`, buffered mode is activated. If `false`, non-buffered mode is used. | `true` |
133
- | `delayed` | If `true`, enables delayed commit for buffer chunks. Only supported in buffered mode (`buffered` must be `true`). If `buffered` is `false`, delayed commit is not available. | `false` |
134
- | `azure_cloud` | Azure cloud environment. E.g., `AzureCloud`, `AzureChinaCloud`, `AzureUSGovernmentCloud`, `AzureGermanCloud`. | `AzureCloud` |
135
- | `chunk_keys` (buffer section) | Only in buffered mode. Keys to use for chunking the buffer. Possible values: `tag`, `time`, or a combination such as `["tag", "time"]`. Controls how data is grouped and flushed. | `["time"]` |
136
- | `timekey` (buffer section) | Only in buffered mode. Time interval for buffer chunking. Possible values: integer seconds (e.g., `60`, `3600`, `86400`). | `86400` (1 day) |
137
- | `timekey_wait` (buffer section) | Only in buffered mode. Wait time before flushing a timekey chunk after its time window closes. Possible values: duration string (e.g., `30s`, `5m`). | `30s` |
138
- | `timekey_use_utc` (buffer section) | Only in buffered mode. Use UTC for timekey chunking. Possible values: `true`, `false`. | `true` |
139
- | `flush_at_shutdown` (buffer section) | Only in buffered mode. Flush buffer at shutdown. Possible values: `true`, `false`. | `true` |
140
- | `retry_max_times` (buffer section) | Only in buffered mode. Maximum number of retry attempts for buffer flush. Possible values: integer (e.g., `5`, `10`). | `5` |
141
- | `retry_wait` (buffer section) | Only in buffered mode. Wait time between buffer flush retries. Possible values: duration string (e.g., `1s`, `10s`). | `1s` |
142
- | `overflow_action` (buffer section) | Only in buffered mode. Action to take when buffer overflows. Possible values: `block`, `drop_oldest_chunk`, `throw_exception`. | `block` |
143
- | `chunk_limit_size` (buffer section) | Only in buffered mode. Maximum size per buffer chunk. Possible values: size string (e.g., `256m`, `1g`). | `256m` |
144
- | `total_limit_size` (buffer section) | Only in buffered mode. Maximum total buffer size. Possible values: size string (e.g., `2g`, `10g`). | `2g` |
145
- | `flush_mode` (buffer section) | Only in buffered mode. Buffer flush mode. Possible values: `interval`, `immediate`, `lazy`. | `interval` |
146
- | `flush_interval` (buffer section) | Only in buffered mode. Interval for buffer flush. Possible values: duration string (e.g., `10s`, `1m`). | `10s` |
147
- | `logger_path` | Optional. File path for plugin log output. If not set, logs are written to stdout. | stdout(terminal) |
148
- | `auth_type` | The authentication type to use. Possible values: `aad`, `user_managed_identity`, `system_managed_identity`,`workload_identity`. | `aad` |
149
- | `workload_identity_client_id` | The client ID for Azure Workload Identity authentication. Required if using workload identity for authentication. | _none_ |
150
- | `workload_identity_tenant_id` | The tenant ID for Azure Workload Identity authentication. Required if using workload identity for authentication. | _none_ |
151
- | `workload_identity_token_file` | The file path to the token file for Azure Workload Identity authentication. Required if using workload identity for authentication. | `/var/run/secrets/azure/tokens/azure-identity-token` |
152
-
153
- ## Sample Configuration
119
+ ## Authentication Methods
154
120
 
121
+ This plugin supports four authentication methods for connecting to Azure Data Explorer:
122
+
123
+ ### 1. Azure AD Application (aad)
124
+ Traditional client credentials flow using Azure AD app registration. Best for CI/CD pipelines and traditional applications.
125
+
126
+ **Required Parameters:**
127
+ - `auth_type`: `aad`
128
+ - `tenant_id`: Your Azure AD tenant ID
129
+ - `client_id`: The Azure AD application client ID
130
+ - `client_secret`: The Azure AD application client secret
131
+
132
+ ### 2. System-Assigned Managed Identity (system_managed_identity)
133
+ Uses the system-assigned managed identity of Azure resources (VMs, App Services, AKS nodes). No secrets to manage.
134
+
135
+ **Required Parameters:**
136
+ - `auth_type`: `system_managed_identity`
137
+ - `managed_identity_client_id`: Set to `SYSTEM`
138
+
139
+ ### 3. User-Assigned Managed Identity (user_managed_identity)
140
+ Uses a user-assigned managed identity. Allows sharing the same identity across multiple Azure resources.
141
+
142
+ **Required Parameters:**
143
+ - `auth_type`: `user_managed_identity`
144
+ - `managed_identity_client_id`: The client ID (GUID) of the user-assigned managed identity
145
+
146
+ ### 4. Azure Workload Identity (workload_identity)
147
+ Modern approach for Kubernetes/AKS workloads. Replaces the legacy Pod Identity system using OIDC federation.
148
+
149
+ **Required Parameters:**
150
+ - `auth_type`: `workload_identity`
151
+ - `workload_identity_client_id`: The client ID for workload identity
152
+ - `workload_identity_tenant_id`: The tenant ID for workload identity
153
+ - `workload_identity_token_file_path`: Path to the workload identity token file (optional, defaults to `/var/run/secrets/azure/tokens/azure-identity-token`)
154
+
155
+ ## Data Schema and Ingestion Mapping
156
+
157
+ ### Fixed 3-Column Schema
158
+ The plugin uses a standardized 3-column schema for all ingested data:
159
+
160
+ | Column | Type | Description |
161
+ |--------|------|-------------|
162
+ | `tag` | string | The Fluentd event tag |
163
+ | `timestamp` | datetime | The event timestamp |
164
+ | `record` | dynamic | The actual event payload as JSON |
165
+
166
+ ### Ingestion Mapping Support
167
+ You can now use pre-defined ingestion mappings in Kusto to transform data during ingestion by setting the `ingestion_mapping_reference` parameter. This allows you to:
168
+
169
+ - Transform the default 3-column format into your desired schema
170
+ - Apply data transformations during ingestion for better performance
171
+ - Use Kusto's native ingestion mapping capabilities
172
+
173
+ **Example:**
174
+ ```conf
175
+ <match test.kusto>
176
+ @type kusto
177
+ # ... other configuration ...
178
+ ingestion_mapping_reference my_custom_mapping
179
+ </match>
180
+ ```
181
+
182
+ Then create the mapping in Kusto:
183
+ ```kql
184
+ .create table MyTable ingestion json mapping "my_custom_mapping"
185
+ @'[
186
+ {"column":"EventTime", "path":"$.timestamp", "datatype":"datetime"},
187
+ {"column":"Source", "path":"$.tag", "datatype":"string"},
188
+ {"column":"Level", "path":"$.record.level", "datatype":"string"},
189
+ {"column":"Message", "path":"$.record.message", "datatype":"string"}
190
+ ]'
191
+ ```
192
+
193
+ ### Alternative Pattern: Landing Table + Update Policy
194
+
195
+ If you prefer not to use ingestion mappings, you can still use this pattern for schema transformation:
196
+
197
+ ```kql
198
+ -- 1. Create landing table (matches plugin output)
199
+ .create table RawLogs (tag:string, timestamp:datetime, record:dynamic)
200
+
201
+ -- 2. Create your target table with desired schema
202
+ .create table ProcessedLogs (
203
+ EventTime: datetime,
204
+ Source: string,
205
+ Level: string,
206
+ Message: string,
207
+ UserId: string,
208
+ Properties: dynamic
209
+ )
210
+
211
+ -- 3. Create update policy to transform data
212
+ .alter table ProcessedLogs policy update
213
+ @'[{
214
+ "IsEnabled": true,
215
+ "Source": "RawLogs",
216
+ "Query": "RawLogs | extend EventTime=timestamp, Source=tag, Level=tostring(record.level), Message=tostring(record.message), UserId=tostring(record.userId), Properties=record.properties | project EventTime, Source, Level, Message, UserId, Properties",
217
+ "IsTransactional": true,
218
+ "PropagateIngestionProperties": false
219
+ }]'
220
+ ```
221
+
222
+ This approach provides flexibility to transform the generic 3-column format into any schema you need.
223
+
224
+ ## Configuration Parameters
225
+
226
+ | Key | Description | Default |
227
+ | --- | ----------- | ------- |
228
+ | `auth_type` | Authentication method: `aad`, `system_managed_identity`, `user_managed_identity`, `workload_identity` | `aad` |
229
+ | `tenant_id` | Azure AD tenant ID. Required for `aad` authentication. | _none_ |
230
+ | `client_id` | Azure AD application client ID. Required for `aad` authentication. | _none_ |
231
+ | `client_secret` | Azure AD application client secret. Required for `aad` authentication. | _none_ |
232
+ | `managed_identity_client_id` | For managed identity: `SYSTEM` for system-assigned, or client ID (GUID) for user-assigned. | _none_ |
233
+ | `workload_identity_client_id` | Client ID for workload identity authentication. | _none_ |
234
+ | `workload_identity_tenant_id` | Tenant ID for workload identity authentication. | _none_ |
235
+ | `workload_identity_token_file_path` | Path to workload identity token file. | `/var/run/secrets/azure/tokens/azure-identity-token` |
236
+ | `endpoint` | Kusto cluster endpoint (e.g., `https://cluster.region.kusto.windows.net`) | _none_ |
237
+ | `database_name` | Target database name. | _none_ |
238
+ | `table_name` | Target table name. | _none_ |
239
+ | `compression_enabled` | Enable gzip compression for HTTP payload. | `true` |
240
+ | `buffered` | Enable disk buffering before ingestion. | `true` |
241
+ | `delayed` | Enable delayed commit for buffer chunks (requires `buffered: true`). | `false` |
242
+ | `deferred_commit_timeout` | Max time (seconds) to wait for deferred commit verification. | `30` |
243
+ | `ingestion_mapping_reference` | Name of a pre-defined ingestion mapping in Kusto for data transformation during ingestion. | _none_ |
244
+ | `azure_cloud` | Azure cloud environment: `AzureCloud`, `AzureChinaCloud`, `AzureUSGovernmentCloud`, `AzureGermanCloud` | `AzureCloud` |
245
+ | `logger_path` | File path for plugin logs. If not set, logs to stdout. | stdout |
246
+
247
+ ### Buffer Configuration (buffered mode only)
248
+ | Key | Description | Default |
249
+ | --- | ----------- | ------- |
250
+ | `chunk_keys` | Buffer chunking keys: `tag`, `time`, or `["tag", "time"]` | `["time"]` |
251
+ | `timekey` | Time interval for buffer chunking (seconds) | `86400` (1 day) |
252
+ | `timekey_wait` | Wait time before flushing timekey chunk | `30s` |
253
+ | `timekey_use_utc` | Use UTC for timekey chunking | `true` |
254
+ | `flush_at_shutdown` | Flush buffer at shutdown | `true` |
255
+ | `retry_max_times` | Maximum retry attempts for buffer flush | `5` |
256
+ | `retry_wait` | Wait time between retries | `1s` |
257
+ | `overflow_action` | Action on buffer overflow: `block`, `drop_oldest_chunk`, `throw_exception` | `block` |
258
+ | `chunk_limit_size` | Maximum size per buffer chunk | `256m` |
259
+ | `total_limit_size` | Maximum total buffer size | `2g` |
260
+ | `flush_mode` | Buffer flush mode: `interval`, `immediate`, `lazy` | `interval` |
261
+ | `flush_interval` | Buffer flush interval | `10s` |
262
+
263
+ ## Sample Configurations
264
+
265
+ ### 1. Azure AD Authentication
155
266
  ```conf
156
- <system>
157
- workers 1
158
- </system>
159
267
  <match test.kusto>
160
268
  @type kusto
161
269
  @log_level debug
270
+
271
+ # Authentication - Azure AD
272
+ auth_type aad
273
+ tenant_id 12345678-1234-1234-1234-123456789abc
274
+ client_id 87654321-4321-4321-4321-abcdef123456
275
+ client_secret your-app-secret-here
276
+
277
+ # Kusto connection
278
+ endpoint https://mycluster.eastus.kusto.windows.net
279
+ database_name MyDatabase
280
+ table_name MyLogs
281
+
282
+ # Optional settings
283
+ azure_cloud AzureCloud
284
+ compression_enabled true
162
285
  buffered true
163
286
  delayed false
164
- endpoint https://yourcluster.region.kusto.windows.net
165
- database_name your-db
166
- table_name your-table
167
- tenant_id <your-tenant-id>
168
- client_id <your-client-id>
287
+
288
+ <buffer>
289
+ @type memory
290
+ timekey 1m
291
+ timekey_wait 30s
292
+ flush_interval 10s
293
+ </buffer>
294
+ </match>
295
+ ```
296
+
297
+ ### 2. System-Assigned Managed Identity
298
+ ```conf
299
+ <match test.kusto>
300
+ @type kusto
301
+ @log_level debug
302
+
303
+ # Authentication - System Managed Identity
304
+ auth_type system_managed_identity
169
305
  managed_identity_client_id SYSTEM
306
+
307
+ # Kusto connection
308
+ endpoint https://mycluster.eastus.kusto.windows.net
309
+ database_name MyDatabase
310
+ table_name MyLogs
311
+
312
+ # Optional settings
313
+ azure_cloud AzureCloud
314
+ compression_enabled true
315
+ buffered true
316
+ delayed false
317
+
318
+ <buffer>
319
+ @type memory
320
+ timekey 1m
321
+ timekey_wait 30s
322
+ flush_interval 10s
323
+ </buffer>
324
+ </match>
325
+ ```
326
+
327
+ ### 3. User-Assigned Managed Identity
328
+ ```conf
329
+ <match test.kusto>
330
+ @type kusto
331
+ @log_level debug
332
+
333
+ # Authentication - User Managed Identity
334
+ auth_type user_managed_identity
335
+ managed_identity_client_id 11111111-2222-3333-4444-555555555555
336
+
337
+ # Kusto connection
338
+ endpoint https://mycluster.eastus.kusto.windows.net
339
+ database_name MyDatabase
340
+ table_name MyLogs
341
+
342
+ # Optional settings
343
+ azure_cloud AzureCloud
170
344
  compression_enabled true
345
+ buffered true
346
+ delayed false
347
+
348
+ <buffer>
349
+ @type memory
350
+ timekey 1m
351
+ timekey_wait 30s
352
+ flush_interval 10s
353
+ </buffer>
354
+ </match>
355
+ ```
356
+
357
+ ### 4. Azure Workload Identity (Kubernetes/AKS)
358
+ ```conf
359
+ <match test.kusto>
360
+ @type kusto
361
+ @log_level debug
362
+
363
+ # Authentication - Workload Identity
364
+ auth_type workload_identity
365
+ workload_identity_client_id 99999999-8888-7777-6666-555555555555
366
+ workload_identity_tenant_id 12345678-1234-1234-1234-123456789abc
367
+ workload_identity_token_file_path /var/run/secrets/azure/tokens/azure-identity-token
368
+
369
+ # Kusto connection
370
+ endpoint https://mycluster.eastus.kusto.windows.net
371
+ database_name MyDatabase
372
+ table_name MyLogs
373
+
374
+ # Optional settings
171
375
  azure_cloud AzureCloud
172
- logger_path /var/log/azure-kusto-fluentd.log
376
+ compression_enabled true
377
+ buffered true
378
+ delayed false
379
+
173
380
  <buffer>
174
381
  @type memory
175
- # To chunk by tag only:
176
- # chunk_keys tag
177
- # To chunk by tag and time:
178
- # chunk_keys tag,time
179
382
  timekey 1m
180
383
  timekey_wait 30s
181
- timekey_use_utc true
182
- flush_at_shutdown true
183
- retry_max_times 5
184
- retry_wait 1s
185
- overflow_action block
186
- chunk_limit_size 256m
187
- total_limit_size 2g
188
- flush_mode interval
189
384
  flush_interval 10s
190
385
  </buffer>
191
386
  </match>
@@ -196,6 +391,14 @@ A table with the expected schema must exist in order for data to be ingested pro
196
391
 
197
392
  This diagram shows the main components and data flow for the plugin, including configuration, error handling, token management, and Azure resource interactions.
198
393
 
394
+ ## Release Notes
395
+
396
+ ### v0.0.2.beta (Latest)
397
+ - **Fixed critical authentication initialization bugs** - Resolved `NameError` in ManagedIdentityTokenProvider and WorkloadIdentityTokenProvider
398
+ - **Added comprehensive unit test coverage** - New test suites for authentication providers with 14 test cases and 45+ assertions
399
+ - **Improved E2E test reliability** - Enhanced timeout configurations to handle Azure Kusto ingestion delays (480s-600s timeouts)
400
+ - **Enhanced authentication stability** - All authentication methods now properly validated: AAD, System/User Managed Identity, Workload Identity, Azure CLI
401
+
199
402
  ## Copyright
200
403
 
201
404
  * License: Apache License, Version 2.0
@@ -52,7 +52,7 @@ class AadTokenProvider < AbstractTokenProvider
52
52
 
53
53
  def post_token_request
54
54
  headers = header
55
- max_retries = 10
55
+ max_retries = 3 # Reduced from 10 to prevent rate limiting cascade
56
56
  retries = 0
57
57
  uri = URI.parse(@token_request_uri)
58
58
  form_data = URI.encode_www_form(
@@ -63,8 +63,7 @@ class AadTokenProvider < AbstractTokenProvider
63
63
  )
64
64
  while retries < max_retries
65
65
  begin
66
- http = Net::HTTP.new(uri.host, uri.port)
67
- http.use_ssl = (uri.scheme == 'https')
66
+ http = create_http_client(uri)
68
67
  request = Net::HTTP::Post.new(uri.request_uri, headers)
69
68
  request.body = form_data
70
69
 
@@ -18,6 +18,7 @@ class ManagedIdentityTokenProvider < AbstractTokenProvider
18
18
 
19
19
  def initialize(outconfiguration)
20
20
  super(outconfiguration)
21
+ setup_config(outconfiguration)
21
22
  token_request_params_set(outconfiguration)
22
23
  end
23
24
 
@@ -39,7 +40,7 @@ class ManagedIdentityTokenProvider < AbstractTokenProvider
39
40
 
40
41
  def token_request_params_set(_outconfiguration)
41
42
  token_acquire_url = IMDS_TOKEN_ACQUIRE_URL.dup + '?' + append_header('resource',
42
- ERB::Util.url_encode(outconfiguration.kusto_endpoint)) + '&' + append_header(
43
+ ERB::Util.url_encode(@resource)) + '&' + append_header(
43
44
  'api-version', '2018-02-01'
44
45
  )
45
46
  unless @object_id.nil?
@@ -50,11 +51,11 @@ class ManagedIdentityTokenProvider < AbstractTokenProvider
50
51
  token_acquire_url = (token_acquire_url + '&' + append_header('msi_res_id',
51
52
  ERB::Util.url_encode(@msi_res_id)))
52
53
  end
53
- URI.parse(token_acquire_url)
54
- return unless @use_user_assigned
55
-
56
- (token_acquire_url + '&' + append_header('client_id',
57
- ERB::Util.url_encode(@managed_identity_client_id)))
54
+ if @use_user_assigned
55
+ token_acquire_url = (token_acquire_url + '&' + append_header('client_id',
56
+ ERB::Util.url_encode(@managed_identity_client_id)))
57
+ end
58
+ @token_acquire_url = token_acquire_url
58
59
  end
59
60
 
60
61
  def fetch_token
@@ -72,7 +73,7 @@ class ManagedIdentityTokenProvider < AbstractTokenProvider
72
73
  uri = URI.parse(@token_acquire_url)
73
74
  while retries < max_retries
74
75
  begin
75
- http = Net::HTTP.new(uri.host, uri.port)
76
+ http = create_http_client(uri)
76
77
  request = Net::HTTP::Get.new(uri.request_uri, headers)
77
78
  response = http.request(request)
78
79
  return JSON.parse(response.body) if response.code.to_i == 200