fluent-plugin-kusto 0.0.1.beta → 0.0.2.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 90834d37833dc31d3a1aaa3f3de064ee260f9c619753857be060c2e48ba4a79c
4
- data.tar.gz: 854001eaf38c3065262d2fd4abdb20c14e95af65ba8f08418825ddf6ac56ad94
3
+ metadata.gz: cc350e3ff175a97e394fb513ed353bc305944c42474ce443030c2ebc855f9d22
4
+ data.tar.gz: 004040b7e123a5f713302fe094cf3ac1d4c8ddc25f7778acf43311084cd5d60e
5
5
  SHA512:
6
- metadata.gz: 66e9089d652413b6a405dc49fafa2fb3fe5399461dd18fdd87025293790dc8d92c9fcf4b8122937f2c06ec766ea43a48716dce499a61c6d298ad03f23299d5c0
7
- data.tar.gz: 93e5fb41d9e4a76a5a34bad266abf596161f24143b1763d27a3378cf7f22afb87601f0a23b80775bb5cb75dc52b9876f6077f35ba59ae6dd3f19182790e32e68
6
+ metadata.gz: 6609741147c05adf1b900a71eeed74f7301541bfb79e47320f3df797108ac0c3bc436df0c1e86862431274caa9cba94d9b27fa3e581aab5c17ea1a6a76f9cb15
7
+ data.tar.gz: 25bcc39f12c974a289a3f2ef8943e4419ac0d8b72d631e1843fc9ba67e351848652d6db51a8935b081182de9c0ba3fdb8e63c8e40da83d2489d06f60c06a7978
data/Gemfile CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- gemspec
5
+ gemspec name: 'fluent-plugin-kusto'
6
6
  gem 'fiddle'
7
7
  gem 'mocha'
8
8
  gem 'ostruct'
data/README.md CHANGED
@@ -50,7 +50,7 @@ $ gem install fluent-plugin-kusto --pre
50
50
  Add the following line to your Gemfile:
51
51
 
52
52
  ```ruby
53
- gem "fluent-plugin-kusto", "~> 0.0.1.beta"
53
+ gem "fluent-plugin-kusto", "~> 0.0.2.beta"
54
54
  ```
55
55
 
56
56
  **Note:** This is a beta release. Use the `--pre` flag with gem install or specify the beta version in your Gemfile.
@@ -116,76 +116,271 @@ A table with the expected schema must exist in order for data to be ingested pro
116
116
  .create table <table_name> (tag:string, timestamp:datetime, record:dynamic)
117
117
  ```
118
118
 
119
- ## Configuration parameters
120
-
121
- | Key | Description | Default |
122
- | -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ |
123
- | `tenant_id` | The tenant/domain ID of the Azure Active Directory (AAD) registered application. Required if `managed_identity_client_id` isn't set. | _none_ |
124
- | `client_id` | The client ID of the AAD registered application. Required if `managed_identity_client_id` isn't set. | _none_ |
125
- | `client_secret` | The client secret of the AAD registered application ([App Secret](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#option-2-create-a-new-application-secret)). Required if `managed_identity_client_id` isn't set. | _none_ |
126
- | `managed_identity_client_id` | The managed identity ID to authenticate with. Set to `SYSTEM` for system-assigned managed identity, or set to the MI client ID (`GUID`) for user-assigned managed identity. Required if `tenant_id`, `client_id`, and `client_secret` aren't set. | _none_ |
127
- | `endpoint` | The cluster's endpoint, usually in the form `https://cluster_name.region.kusto.windows.net` | _none_ |
128
- | `database_name` | The database name. | _none_ |
129
- | `table_name` | The table name. | _none_ |
130
- | `compression_enabled` | If enabled, sends compressed HTTP payload (gzip) to Kusto. | `true` |
131
- | `workers` | The number of [workers](../../../administration/multithreading#outputs) to perform flush operations for this output. | `0` |
132
- | `buffered` | Enable buffering into disk before ingesting into Azure Kusto. If `buffered` is `true`, buffered mode is activated. If `false`, non-buffered mode is used. | `true` |
133
- | `delayed` | If `true`, enables delayed commit for buffer chunks. Only supported in buffered mode (`buffered` must be `true`). If `buffered` is `false`, delayed commit is not available. | `false` |
134
- | `azure_cloud` | Azure cloud environment. E.g., `AzureCloud`, `AzureChinaCloud`, `AzureUSGovernmentCloud`, `AzureGermanCloud`. | `AzureCloud` |
135
- | `chunk_keys` (buffer section) | Only in buffered mode. Keys to use for chunking the buffer. Possible values: `tag`, `time`, or a combination such as `["tag", "time"]`. Controls how data is grouped and flushed. | `["time"]` |
136
- | `timekey` (buffer section) | Only in buffered mode. Time interval for buffer chunking. Possible values: integer seconds (e.g., `60`, `3600`, `86400`). | `86400` (1 day) |
137
- | `timekey_wait` (buffer section) | Only in buffered mode. Wait time before flushing a timekey chunk after its time window closes. Possible values: duration string (e.g., `30s`, `5m`). | `30s` |
138
- | `timekey_use_utc` (buffer section) | Only in buffered mode. Use UTC for timekey chunking. Possible values: `true`, `false`. | `true` |
139
- | `flush_at_shutdown` (buffer section) | Only in buffered mode. Flush buffer at shutdown. Possible values: `true`, `false`. | `true` |
140
- | `retry_max_times` (buffer section) | Only in buffered mode. Maximum number of retry attempts for buffer flush. Possible values: integer (e.g., `5`, `10`). | `5` |
141
- | `retry_wait` (buffer section) | Only in buffered mode. Wait time between buffer flush retries. Possible values: duration string (e.g., `1s`, `10s`). | `1s` |
142
- | `overflow_action` (buffer section) | Only in buffered mode. Action to take when buffer overflows. Possible values: `block`, `drop_oldest_chunk`, `throw_exception`. | `block` |
143
- | `chunk_limit_size` (buffer section) | Only in buffered mode. Maximum size per buffer chunk. Possible values: size string (e.g., `256m`, `1g`). | `256m` |
144
- | `total_limit_size` (buffer section) | Only in buffered mode. Maximum total buffer size. Possible values: size string (e.g., `2g`, `10g`). | `2g` |
145
- | `flush_mode` (buffer section) | Only in buffered mode. Buffer flush mode. Possible values: `interval`, `immediate`, `lazy`. | `interval` |
146
- | `flush_interval` (buffer section) | Only in buffered mode. Interval for buffer flush. Possible values: duration string (e.g., `10s`, `1m`). | `10s` |
147
- | `logger_path` | Optional. File path for plugin log output. If not set, logs are written to stdout. | stdout(terminal) |
148
- | `auth_type` | The authentication type to use. Possible values: `aad`, `user_managed_identity`, `system_managed_identity`,`workload_identity`. | `aad` |
149
- | `workload_identity_client_id` | The client ID for Azure Workload Identity authentication. Required if using workload identity for authentication. | _none_ |
150
- | `workload_identity_tenant_id` | The tenant ID for Azure Workload Identity authentication. Required if using workload identity for authentication. | _none_ |
151
- | `workload_identity_token_file` | The file path to the token file for Azure Workload Identity authentication. Required if using workload identity for authentication. | `/var/run/secrets/azure/tokens/azure-identity-token` |
152
-
153
- ## Sample Configuration
119
+ ## Authentication Methods
154
120
 
121
+ This plugin supports four authentication methods for connecting to Azure Data Explorer:
122
+
123
+ ### 1. Azure AD Application (aad)
124
+ Traditional client credentials flow using Azure AD app registration. Best for CI/CD pipelines and traditional applications.
125
+
126
+ **Required Parameters:**
127
+ - `auth_type`: `aad`
128
+ - `tenant_id`: Your Azure AD tenant ID
129
+ - `client_id`: The Azure AD application client ID
130
+ - `client_secret`: The Azure AD application client secret
131
+
132
+ ### 2. System-Assigned Managed Identity (system_managed_identity)
133
+ Uses the system-assigned managed identity of Azure resources (VMs, App Services, AKS nodes). No secrets to manage.
134
+
135
+ **Required Parameters:**
136
+ - `auth_type`: `system_managed_identity`
137
+ - `managed_identity_client_id`: Set to `SYSTEM`
138
+
139
+ ### 3. User-Assigned Managed Identity (user_managed_identity)
140
+ Uses a user-assigned managed identity. Allows sharing the same identity across multiple Azure resources.
141
+
142
+ **Required Parameters:**
143
+ - `auth_type`: `user_managed_identity`
144
+ - `managed_identity_client_id`: The client ID (GUID) of the user-assigned managed identity
145
+
146
+ ### 4. Azure Workload Identity (workload_identity)
147
+ Modern approach for Kubernetes/AKS workloads. Replaces the legacy Pod Identity system using OIDC federation.
148
+
149
+ **Required Parameters:**
150
+ - `auth_type`: `workload_identity`
151
+ - `workload_identity_client_id`: The client ID for workload identity
152
+ - `workload_identity_tenant_id`: The tenant ID for workload identity
153
+ - `workload_identity_token_file_path`: Path to the workload identity token file (optional, defaults to `/var/run/secrets/azure/tokens/azure-identity-token`)
154
+
155
+ ## Data Schema and Ingestion Mapping
156
+
157
+ ### Fixed 3-Column Schema
158
+ The plugin uses a standardized 3-column schema for all ingested data:
159
+
160
+ | Column | Type | Description |
161
+ |--------|------|-------------|
162
+ | `tag` | string | The Fluentd event tag |
163
+ | `timestamp` | datetime | The event timestamp |
164
+ | `record` | dynamic | The actual event payload as JSON |
165
+
166
+ ### Ingestion Mapping Support
167
+ You can now use pre-defined ingestion mappings in Kusto to transform data during ingestion by setting the `ingestion_mapping_reference` parameter. This allows you to:
168
+
169
+ - Transform the default 3-column format into your desired schema
170
+ - Apply data transformations during ingestion for better performance
171
+ - Use Kusto's native ingestion mapping capabilities
172
+
173
+ **Example:**
174
+ ```conf
175
+ <match test.kusto>
176
+ @type kusto
177
+ # ... other configuration ...
178
+ ingestion_mapping_reference my_custom_mapping
179
+ </match>
180
+ ```
181
+
182
+ Then create the mapping in Kusto:
183
+ ```kql
184
+ .create table MyTable ingestion json mapping "my_custom_mapping"
185
+ @'[
186
+ {"column":"EventTime", "path":"$.timestamp", "datatype":"datetime"},
187
+ {"column":"Source", "path":"$.tag", "datatype":"string"},
188
+ {"column":"Level", "path":"$.record.level", "datatype":"string"},
189
+ {"column":"Message", "path":"$.record.message", "datatype":"string"}
190
+ ]'
191
+ ```
192
+
193
+ ### Alternative Pattern: Landing Table + Update Policy
194
+
195
+ If you prefer not to use ingestion mappings, you can still use this pattern for schema transformation:
196
+
197
+ ```kql
198
+ -- 1. Create landing table (matches plugin output)
199
+ .create table RawLogs (tag:string, timestamp:datetime, record:dynamic)
200
+
201
+ -- 2. Create your target table with desired schema
202
+ .create table ProcessedLogs (
203
+ EventTime: datetime,
204
+ Source: string,
205
+ Level: string,
206
+ Message: string,
207
+ UserId: string,
208
+ Properties: dynamic
209
+ )
210
+
211
+ -- 3. Create update policy to transform data
212
+ .alter table ProcessedLogs policy update
213
+ @'[{
214
+ "IsEnabled": true,
215
+ "Source": "RawLogs",
216
+ "Query": "RawLogs | extend EventTime=timestamp, Source=tag, Level=tostring(record.level), Message=tostring(record.message), UserId=tostring(record.userId), Properties=record.properties | project EventTime, Source, Level, Message, UserId, Properties",
217
+ "IsTransactional": true,
218
+ "PropagateIngestionProperties": false
219
+ }]'
220
+ ```
221
+
222
+ This approach provides flexibility to transform the generic 3-column format into any schema you need.
223
+
224
+ ## Configuration Parameters
225
+
226
+ | Key | Description | Default |
227
+ | --- | ----------- | ------- |
228
+ | `auth_type` | Authentication method: `aad`, `system_managed_identity`, `user_managed_identity`, `workload_identity` | `aad` |
229
+ | `tenant_id` | Azure AD tenant ID. Required for `aad` authentication. | _none_ |
230
+ | `client_id` | Azure AD application client ID. Required for `aad` authentication. | _none_ |
231
+ | `client_secret` | Azure AD application client secret. Required for `aad` authentication. | _none_ |
232
+ | `managed_identity_client_id` | For managed identity: `SYSTEM` for system-assigned, or client ID (GUID) for user-assigned. | _none_ |
233
+ | `workload_identity_client_id` | Client ID for workload identity authentication. | _none_ |
234
+ | `workload_identity_tenant_id` | Tenant ID for workload identity authentication. | _none_ |
235
+ | `workload_identity_token_file_path` | Path to workload identity token file. | `/var/run/secrets/azure/tokens/azure-identity-token` |
236
+ | `endpoint` | Kusto cluster endpoint (e.g., `https://cluster.region.kusto.windows.net`) | _none_ |
237
+ | `database_name` | Target database name. | _none_ |
238
+ | `table_name` | Target table name. | _none_ |
239
+ | `compression_enabled` | Enable gzip compression for HTTP payload. | `true` |
240
+ | `buffered` | Enable disk buffering before ingestion. | `true` |
241
+ | `delayed` | Enable delayed commit for buffer chunks (requires `buffered: true`). | `false` |
242
+ | `deferred_commit_timeout` | Max time (seconds) to wait for deferred commit verification. | `30` |
243
+ | `ingestion_mapping_reference` | Name of a pre-defined ingestion mapping in Kusto for data transformation during ingestion. | _none_ |
244
+ | `azure_cloud` | Azure cloud environment: `AzureCloud`, `AzureChinaCloud`, `AzureUSGovernmentCloud`, `AzureGermanCloud` | `AzureCloud` |
245
+ | `logger_path` | File path for plugin logs. If not set, logs to stdout. | stdout |
246
+
247
+ ### Buffer Configuration (buffered mode only)
248
+ | Key | Description | Default |
249
+ | --- | ----------- | ------- |
250
+ | `chunk_keys` | Buffer chunking keys: `tag`, `time`, or `["tag", "time"]` | `["time"]` |
251
+ | `timekey` | Time interval for buffer chunking (seconds) | `86400` (1 day) |
252
+ | `timekey_wait` | Wait time before flushing timekey chunk | `30s` |
253
+ | `timekey_use_utc` | Use UTC for timekey chunking | `true` |
254
+ | `flush_at_shutdown` | Flush buffer at shutdown | `true` |
255
+ | `retry_max_times` | Maximum retry attempts for buffer flush | `5` |
256
+ | `retry_wait` | Wait time between retries | `1s` |
257
+ | `overflow_action` | Action on buffer overflow: `block`, `drop_oldest_chunk`, `throw_exception` | `block` |
258
+ | `chunk_limit_size` | Maximum size per buffer chunk | `256m` |
259
+ | `total_limit_size` | Maximum total buffer size | `2g` |
260
+ | `flush_mode` | Buffer flush mode: `interval`, `immediate`, `lazy` | `interval` |
261
+ | `flush_interval` | Buffer flush interval | `10s` |
262
+
263
+ ## Sample Configurations
264
+
265
+ ### 1. Azure AD Authentication
155
266
  ```conf
156
- <system>
157
- workers 1
158
- </system>
159
267
  <match test.kusto>
160
268
  @type kusto
161
269
  @log_level debug
270
+
271
+ # Authentication - Azure AD
272
+ auth_type aad
273
+ tenant_id 12345678-1234-1234-1234-123456789abc
274
+ client_id 87654321-4321-4321-4321-abcdef123456
275
+ client_secret your-app-secret-here
276
+
277
+ # Kusto connection
278
+ endpoint https://mycluster.eastus.kusto.windows.net
279
+ database_name MyDatabase
280
+ table_name MyLogs
281
+
282
+ # Optional settings
283
+ azure_cloud AzureCloud
284
+ compression_enabled true
162
285
  buffered true
163
286
  delayed false
164
- endpoint https://yourcluster.region.kusto.windows.net
165
- database_name your-db
166
- table_name your-table
167
- tenant_id <your-tenant-id>
168
- client_id <your-client-id>
287
+
288
+ <buffer>
289
+ @type memory
290
+ timekey 1m
291
+ timekey_wait 30s
292
+ flush_interval 10s
293
+ </buffer>
294
+ </match>
295
+ ```
296
+
297
+ ### 2. System-Assigned Managed Identity
298
+ ```conf
299
+ <match test.kusto>
300
+ @type kusto
301
+ @log_level debug
302
+
303
+ # Authentication - System Managed Identity
304
+ auth_type system_managed_identity
169
305
  managed_identity_client_id SYSTEM
306
+
307
+ # Kusto connection
308
+ endpoint https://mycluster.eastus.kusto.windows.net
309
+ database_name MyDatabase
310
+ table_name MyLogs
311
+
312
+ # Optional settings
313
+ azure_cloud AzureCloud
314
+ compression_enabled true
315
+ buffered true
316
+ delayed false
317
+
318
+ <buffer>
319
+ @type memory
320
+ timekey 1m
321
+ timekey_wait 30s
322
+ flush_interval 10s
323
+ </buffer>
324
+ </match>
325
+ ```
326
+
327
+ ### 3. User-Assigned Managed Identity
328
+ ```conf
329
+ <match test.kusto>
330
+ @type kusto
331
+ @log_level debug
332
+
333
+ # Authentication - User Managed Identity
334
+ auth_type user_managed_identity
335
+ managed_identity_client_id 11111111-2222-3333-4444-555555555555
336
+
337
+ # Kusto connection
338
+ endpoint https://mycluster.eastus.kusto.windows.net
339
+ database_name MyDatabase
340
+ table_name MyLogs
341
+
342
+ # Optional settings
343
+ azure_cloud AzureCloud
170
344
  compression_enabled true
345
+ buffered true
346
+ delayed false
347
+
348
+ <buffer>
349
+ @type memory
350
+ timekey 1m
351
+ timekey_wait 30s
352
+ flush_interval 10s
353
+ </buffer>
354
+ </match>
355
+ ```
356
+
357
+ ### 4. Azure Workload Identity (Kubernetes/AKS)
358
+ ```conf
359
+ <match test.kusto>
360
+ @type kusto
361
+ @log_level debug
362
+
363
+ # Authentication - Workload Identity
364
+ auth_type workload_identity
365
+ workload_identity_client_id 99999999-8888-7777-6666-555555555555
366
+ workload_identity_tenant_id 12345678-1234-1234-1234-123456789abc
367
+ workload_identity_token_file_path /var/run/secrets/azure/tokens/azure-identity-token
368
+
369
+ # Kusto connection
370
+ endpoint https://mycluster.eastus.kusto.windows.net
371
+ database_name MyDatabase
372
+ table_name MyLogs
373
+
374
+ # Optional settings
171
375
  azure_cloud AzureCloud
172
- logger_path /var/log/azure-kusto-fluentd.log
376
+ compression_enabled true
377
+ buffered true
378
+ delayed false
379
+
173
380
  <buffer>
174
381
  @type memory
175
- # To chunk by tag only:
176
- # chunk_keys tag
177
- # To chunk by tag and time:
178
- # chunk_keys tag,time
179
382
  timekey 1m
180
383
  timekey_wait 30s
181
- timekey_use_utc true
182
- flush_at_shutdown true
183
- retry_max_times 5
184
- retry_wait 1s
185
- overflow_action block
186
- chunk_limit_size 256m
187
- total_limit_size 2g
188
- flush_mode interval
189
384
  flush_interval 10s
190
385
  </buffer>
191
386
  </match>
@@ -196,6 +391,14 @@ A table with the expected schema must exist in order for data to be ingested pro
196
391
 
197
392
  This diagram shows the main components and data flow for the plugin, including configuration, error handling, token management, and Azure resource interactions.
198
393
 
394
+ ## Release Notes
395
+
396
+ ### v0.0.2.beta (Latest)
397
+ - **Fixed critical authentication initialization bugs** - Resolved `NameError` in ManagedIdentityTokenProvider and WorkloadIdentityTokenProvider
398
+ - **Added comprehensive unit test coverage** - New test suites for authentication providers with 14 test cases and 45+ assertions
399
+ - **Improved E2E test reliability** - Enhanced timeout configurations to handle Azure Kusto ingestion delays (480s-600s timeouts)
400
+ - **Enhanced authentication stability** - All authentication methods now properly validated: AAD, System/User Managed Identity, Workload Identity, Azure CLI
401
+
199
402
  ## Copyright
200
403
 
201
404
  * License: Apache License, Version 2.0
@@ -18,6 +18,7 @@ class ManagedIdentityTokenProvider < AbstractTokenProvider
18
18
 
19
19
  def initialize(outconfiguration)
20
20
  super(outconfiguration)
21
+ setup_config(outconfiguration)
21
22
  token_request_params_set(outconfiguration)
22
23
  end
23
24
 
@@ -39,7 +40,7 @@ class ManagedIdentityTokenProvider < AbstractTokenProvider
39
40
 
40
41
  def token_request_params_set(_outconfiguration)
41
42
  token_acquire_url = IMDS_TOKEN_ACQUIRE_URL.dup + '?' + append_header('resource',
42
- ERB::Util.url_encode(outconfiguration.kusto_endpoint)) + '&' + append_header(
43
+ ERB::Util.url_encode(@resource)) + '&' + append_header(
43
44
  'api-version', '2018-02-01'
44
45
  )
45
46
  unless @object_id.nil?
@@ -50,11 +51,11 @@ class ManagedIdentityTokenProvider < AbstractTokenProvider
50
51
  token_acquire_url = (token_acquire_url + '&' + append_header('msi_res_id',
51
52
  ERB::Util.url_encode(@msi_res_id)))
52
53
  end
53
- URI.parse(token_acquire_url)
54
- return unless @use_user_assigned
55
-
56
- (token_acquire_url + '&' + append_header('client_id',
57
- ERB::Util.url_encode(@managed_identity_client_id)))
54
+ if @use_user_assigned
55
+ token_acquire_url = (token_acquire_url + '&' + append_header('client_id',
56
+ ERB::Util.url_encode(@managed_identity_client_id)))
57
+ end
58
+ @token_acquire_url = token_acquire_url
58
59
  end
59
60
 
60
61
  def fetch_token
@@ -9,6 +9,10 @@ class WorkloadIdentity < AbstractTokenProvider
9
9
  DEFAULT_TOKEN_FILE = '/var/run/secrets/azure/tokens/azure-identity-token'
10
10
  AZURE_OAUTH2_TOKEN_ENDPOINT = 'https://login.microsoftonline.com/%<tenant_id>s/oauth2/v2.0/token'
11
11
 
12
+ def initialize(outconfiguration)
13
+ super(outconfiguration)
14
+ end
15
+
12
16
  # Use get_token from base class for token retrieval
13
17
 
14
18
  private
@@ -118,7 +118,7 @@ class OutputConfiguration
118
118
  'database_name' => @database_name,
119
119
  'table_name' => @table_name
120
120
  }
121
- check_required_configs(required, %w[client_app_id tenant_id kusto_endpoint database_name table_name])
121
+ check_required_configs(required, %w[workload_identity_client_id workload_identity_tenant_id kusto_endpoint database_name table_name])
122
122
  end
123
123
 
124
124
  def aad_required_hash
@@ -77,13 +77,15 @@ class Ingester
77
77
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
78
78
 
79
79
  # rubocop:disable Metrics/MethodLength
80
- def prepare_ingestion_message2(db, table, data_uri, blob_size_bytes, identity_token, compression_enabled = true)
80
+ def prepare_ingestion_message2(db, table, data_uri, blob_size_bytes, identity_token, compression_enabled = true, mapping_reference = nil)
81
81
  # Prepare the ingestion message for Azure Queue
82
82
  additional_props = {
83
83
  'authorizationContext' => identity_token,
84
84
  'format' => 'multijson'
85
85
  }
86
86
  additional_props['CompressionType'] = 'gzip' if compression_enabled
87
+ additional_props['ingestionMappingReference'] = mapping_reference if mapping_reference && !mapping_reference.empty?
88
+
87
89
  {
88
90
  'Id' => SecureRandom.uuid,
89
91
  'BlobPath' => data_uri,
@@ -120,11 +122,11 @@ class Ingester
120
122
  end
121
123
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
122
124
 
123
- def upload_data_to_blob_and_queue(raw_data, blob_name, db, table_name, compression_enabled = true)
125
+ def upload_data_to_blob_and_queue(raw_data, blob_name, db, table_name, compression_enabled = true, mapping_reference = nil)
124
126
  # Upload data to blob and send ingestion message to queue
125
127
  blob_uri, blob_size_bytes = upload_to_blob(@resources[:blob_sas_uri], raw_data, blob_name)
126
128
  message = prepare_ingestion_message2(db, table_name, blob_uri, blob_size_bytes, @resources[:identity_token],
127
- compression_enabled)
129
+ compression_enabled, mapping_reference)
128
130
  post_message_to_queue_http(@resources[:queue_sas_uri], message)
129
131
  { blob_uri: blob_uri, blob_size_bytes: blob_size_bytes }
130
132
  end
@@ -33,6 +33,8 @@ module Fluent
33
33
  config_param :azure_cloud, :string, default: 'AzureCloud'
34
34
  config_param :compression_enabled, :bool, default: true
35
35
  config_param :logger_path, :string, default: nil
36
+ config_param :ingestion_mapping_reference, :string, default: nil,
37
+ desc: 'Name of a pre-defined ingestion mapping in Kusto for data transformation during ingestion.'
36
38
  config_param :auth_type, :string, default: 'aad',
37
39
  desc: 'Authentication type to use for Kusto. Options: "aad", "user_managed_identity", "system_managed_identity", "workload_identity".'
38
40
  config_param :workload_identity_client_id, :string, default: nil, secret: true,
@@ -164,7 +166,7 @@ module Fluent
164
166
  '_')
165
167
  blob_name = "fluentd_event_#{safe_tag}.json"
166
168
  @ingester.upload_data_to_blob_and_queue(formatted, blob_name, @database_name, @table_name,
167
- compression_enabled)
169
+ compression_enabled, @ingestion_mapping_reference)
168
170
  rescue StandardError => e
169
171
  @logger&.error("Failed to ingest event to Kusto: #{e}\nEvent skipped: #{record.inspect}\n#{e.backtrace.join("\n")}")
170
172
  next
@@ -184,7 +186,7 @@ module Fluent
184
186
  data_to_upload = compression_enabled ? compress_data(raw_data) : raw_data
185
187
  begin
186
188
  @ingester.upload_data_to_blob_and_queue(data_to_upload, blob_name, @database_name, @table_name,
187
- compression_enabled)
189
+ compression_enabled, @ingestion_mapping_reference)
188
190
  rescue StandardError => e
189
191
  handle_kusto_error(e, unique_id)
190
192
  end
@@ -224,7 +226,7 @@ module Fluent
224
226
  data_to_upload = compression_enabled ? compress_data(updated_raw_data) : updated_raw_data
225
227
  begin
226
228
  @ingester.upload_data_to_blob_and_queue(data_to_upload, blob_name, @database_name, @table_name,
227
- compression_enabled)
229
+ compression_enabled, @ingestion_mapping_reference)
228
230
  if @shutdown_called || !@delayed
229
231
  commit_write(chunk.unique_id)
230
232
  if @shutdown_called