fluent-plugin-kusto 0.0.2.beta → 0.0.3.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,862 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'test-unit'
4
- require 'fluent/test'
5
- require 'fluent/test/driver/output'
6
- require 'fluent/test/helpers'
7
- require 'fluent/plugin/out_kusto'
8
- require 'net/http'
9
- require 'uri'
10
- require 'json'
11
- require_relative '../../lib/fluent/plugin/kusto_query'
12
- require_relative '../../lib/fluent/plugin/ingester'
13
- require_relative '../../lib/fluent/plugin/conffile'
14
- require 'ostruct'
15
- require 'logger'
16
- require 'concurrent'
17
- require 'tempfile'
18
- require 'set'
19
-
20
- class KustoE2ETest < Test::Unit::TestCase
21
- include Fluent::Test::Helpers
22
-
23
- def setup
24
- Fluent::Test.setup
25
- # Setup logger
26
- @logger = Logger.new($stdout)
27
- @logger.level = Logger::INFO
28
-
29
- # Configuration from environment
30
- @engine_url = ENV['CLUSTER'] || 'https://example.kusto.windows.net'
31
- @database = ENV['DB'] || 'testdb'
32
- @table = "FluentD_#{Time.now.to_i}"
33
- @columns = '(tag:string, timestamp:datetime, record:string)'
34
- @client_id = ENV['CLIENT_ID'] || ''
35
- @client_secret = ENV['CLIENT_SECRET'] || ''
36
- @tenant_id = ENV['TENANT_ID'] || ''
37
- @managed_identity_client_id = ENV['MANAGED_IDENTITY_CLIENT_ID'] || ''
38
- @auth_type = (ENV['AUTH_TYPE'] || 'aad').downcase
39
- @wi_client_id = ENV['WI_CLIENT_ID'] || ''
40
- @wi_tenant_id = ENV['WI_TENANT_ID'] || ''
41
- @wi_token_file = ENV['WI_TOKEN_FILE'] || ''
42
-
43
- # Create driver with default configuration
44
- setup_auth_config
45
- configure_and_start_driver
46
- setup_test_table(@table)
47
- end
48
-
49
- def teardown
50
- kusto_query(".drop table #{@table} ifexists", :management)
51
- end
52
-
53
- def get_access_token
54
- # Use the same logic as the plugin's Ingester class
55
- opts = {
56
- tenant_id: @tenant_id,
57
- kusto_endpoint: @engine_url,
58
- database_name: @database,
59
- table_name: @table,
60
- azure_cloud: 'AzureCloud'
61
- }
62
-
63
- case @auth_type
64
- when 'azcli'
65
- opts[:auth_type] = 'azcli'
66
- when 'workload_identity'
67
- opts[:auth_type] = 'workload_identity'
68
- opts[:workload_identity_client_id] = @wi_client_id
69
- opts[:workload_identity_tenant_id] = @wi_tenant_id
70
- opts[:workload_identity_token_file_path] = @wi_token_file
71
- when 'user_managed_identity', 'system_managed_identity'
72
- opts[:auth_type] = @auth_type
73
- opts[:managed_identity_client_id] = @managed_identity_client_id
74
- else
75
- opts[:auth_type] = 'aad'
76
- opts[:client_app_id] = @client_id
77
- opts[:client_app_secret] = @client_secret
78
- end
79
-
80
- outconfig = OutputConfiguration.new(opts)
81
- ingester = Ingester.new(outconfig)
82
- def ingester.access_token
83
- token = @client.token_provider.get_token
84
- # Extract token if it's a hash or object
85
- case token
86
- when Hash
87
- token[:access_token] || token['access_token']
88
- when String
89
- token
90
- else
91
- token.respond_to?(:access_token) ? token.access_token : token.to_s
92
- end
93
- end
94
- ingester.access_token
95
- end
96
-
97
- def kusto_query(query, type = :data)
98
- endpoint = @engine_url
99
- path = type == :management ? '/v1/rest/mgmt' : '/v1/rest/query'
100
- uri = URI("#{endpoint}#{path}")
101
- token = get_access_token
102
-
103
- headers = {
104
- 'Authorization' => "Bearer #{token}",
105
- 'Content-Type' => 'application/json',
106
- 'Accept' => 'application/json',
107
- 'x-ms-client-version' => 'Kusto.FluentD:1.0.0'
108
- }
109
-
110
- body_hash = { csl: query }
111
- body_hash[:db] = @database if @database
112
- body = body_hash.to_json
113
-
114
- request = Net::HTTP::Post.new(uri.request_uri, headers)
115
- request.body = body
116
- http = Net::HTTP.new(uri.host, uri.port)
117
- http.use_ssl = true
118
-
119
- response = http.request(request)
120
- unless response.code.to_i.between?(200, 299)
121
- @logger.error("Kusto query failed with status #{response.code}: #{response.body}")
122
- return []
123
- end
124
-
125
- begin
126
- response_json = JSON.parse(response.body)
127
- tables = response_json['Tables']
128
- rows = tables && tables[0] && tables[0]['Rows']
129
- rows || []
130
- rescue JSON::ParserError => e
131
- @logger.error("Failed to parse JSON: #{e}")
132
- @logger.error(response.body)
133
- []
134
- end
135
- end
136
-
137
- def setup_auth_config
138
- @auth_lines = case @auth_type
139
- when 'azcli'
140
- <<-AUTH
141
- auth_type azcli
142
- AUTH
143
- when 'workload_identity'
144
- <<-AUTH
145
- auth_type workload_identity
146
- workload_identity_client_id #{@wi_client_id}
147
- workload_identity_tenant_id #{@wi_tenant_id}
148
- workload_identity_token_file_path #{@wi_token_file}
149
- AUTH
150
- when 'user_managed_identity', 'system_managed_identity'
151
- <<-AUTH
152
- auth_type #{@auth_type}
153
- managed_identity_client_id #{@managed_identity_client_id}
154
- AUTH
155
- else
156
- <<-AUTH
157
- auth_type aad
158
- tenant_id #{@tenant_id}
159
- client_id #{@client_id}
160
- client_secret #{@client_secret}
161
- AUTH
162
- end
163
- end
164
-
165
- def configure_and_start_driver(options = {})
166
- config_options = {
167
- buffered: false,
168
- delayed: false,
169
- table_name: @table,
170
- flush_interval: '5s',
171
- chunk_limit_size: '8k',
172
- timekey: 60,
173
- compression_enabled: true
174
- }.merge(options)
175
-
176
- buffer_config = if config_options[:buffered]
177
- buffer_type = config_options[:buffer_type] || 'memory'
178
- flush_mode = config_options[:flush_mode] || 'interval'
179
-
180
- base_buffer = <<-BUFFER
181
- <buffer>
182
- @type #{buffer_type}
183
- chunk_limit_size #{config_options[:chunk_limit_size]}
184
- timekey #{config_options[:timekey]}
185
- flush_mode #{flush_mode}
186
- flush_at_shutdown #{config_options[:flush_at_shutdown] || 'true'}
187
- overflow_action #{config_options[:overflow_action] || 'throw_exception'}
188
- retry_max_interval #{config_options[:retry_max_interval] || '30'}
189
- retry_forever #{config_options[:retry_forever] || 'false'}
190
- flush_thread_count #{config_options[:flush_thread_count] || '1'}
191
- BUFFER
192
-
193
- # Only add flush_interval if flush_mode is not 'immediate'
194
- if flush_mode != 'immediate'
195
- base_buffer = base_buffer.sub(/flush_mode #{flush_mode}/,
196
- "flush_interval #{config_options[:flush_interval]}\n flush_mode #{flush_mode}")
197
- end
198
-
199
- # Add file-specific configurations
200
- if buffer_type == 'file'
201
- base_buffer += " path #{config_options[:buffer_path] || '/tmp/fluentd_test_buffer'}\n"
202
- end
203
-
204
- # Add additional buffer configurations
205
- if config_options[:total_limit_size]
206
- base_buffer += " total_limit_size #{config_options[:total_limit_size]}\n"
207
- end
208
-
209
- if config_options[:chunk_limit_records]
210
- base_buffer += " chunk_limit_records #{config_options[:chunk_limit_records]}\n"
211
- end
212
-
213
- base_buffer += " </buffer>\n"
214
- base_buffer
215
- else
216
- ''
217
- end
218
-
219
- # Add deferred_commit_timeout if specified
220
- timeout_config = config_options[:deferred_commit_timeout] ? "deferred_commit_timeout #{config_options[:deferred_commit_timeout]}" : ''
221
-
222
- # Add ingestion_mapping_reference if specified
223
- mapping_config = config_options[:ingestion_mapping_reference] ? "ingestion_mapping_reference #{config_options[:ingestion_mapping_reference]}" : ''
224
-
225
- @conf = <<-CONF
226
- @type kusto
227
- @log_level debug
228
- buffered #{config_options[:buffered]}
229
- delayed #{config_options[:delayed]}
230
- endpoint #{@engine_url}
231
- database_name #{@database}
232
- table_name #{config_options[:table_name]}
233
- compression_enabled #{config_options[:compression_enabled]}
234
- #{timeout_config}
235
- #{mapping_config}
236
- #{@auth_lines}
237
- #{buffer_config}
238
- CONF
239
-
240
- @driver = Fluent::Test::Driver::Output.new(Fluent::Plugin::KustoOutput).configure(@conf)
241
- @driver.instance.instance_variable_set(:@logger, @logger)
242
- @driver.instance.start
243
- end
244
-
245
- def setup_test_table(table_name)
246
- kusto_query(".drop table #{table_name} ifexists", :management)
247
- kusto_query(".create table #{table_name} #{@columns}", :management)
248
- end
249
-
250
- def wait_for_ingestion(query, expected_count, max_wait = 240, interval = 5)
251
- waited = 0
252
- rows = []
253
-
254
- while waited < max_wait
255
- rows = kusto_query(query)
256
- break if rows.size >= expected_count
257
-
258
- sleep interval
259
- waited += interval
260
- @logger.debug("Waiting for ingestion: #{waited}s elapsed, #{rows.size}/#{expected_count} records found")
261
- end
262
-
263
- rows
264
- end
265
-
266
- def generate_test_events(count, base_id, tag_suffix = '')
267
- time = Time.now.to_i
268
- events = []
269
- count.times do |i|
270
- events << [
271
- time + i,
272
- {
273
- 'id' => base_id + i,
274
- 'name' => "test_event_#{tag_suffix}_#{i + 1}",
275
- 'timestamp' => Time.at(time + i).utc.iso8601,
276
- 'data' => {
277
- 'index' => i,
278
- 'batch_id' => base_id,
279
- 'test_type' => tag_suffix
280
- }
281
- }
282
- ]
283
- end
284
- events
285
- end
286
-
287
- def create_temp_buffer_file
288
- temp_file = Tempfile.new(['fluentd_buffer', '.buf'])
289
- temp_path = temp_file.path
290
- temp_file.close
291
- temp_path
292
- end
293
-
294
- # Before running this test, ensure your service principal has TableAdmin and Ingestor permissions on the test database.
295
- test 'process function ingestion to Kusto' do
296
- test_table = "FluentD_process_#{Time.now.to_i}"
297
- configure_and_start_driver(table_name: test_table)
298
- setup_test_table(test_table)
299
-
300
- tag = 'e2e.test'
301
- time = Time.now.to_i
302
- record = { 'id' => 1, 'name' => 'test' }
303
- event_stream = Fluent::ArrayEventStream.new([[time, record]])
304
-
305
- assert_nothing_raised { @driver.instance.process(tag, event_stream) }
306
-
307
- query = "#{test_table} | extend r = parse_json(record) | where r.id == 1 and r.name == \"test\""
308
- rows = wait_for_ingestion(query, 1)
309
-
310
- assert(!rows.empty?, 'Data was not ingested into Kusto')
311
-
312
- found = false
313
- rows.each do |row|
314
- r = begin
315
- row[3]
316
- rescue StandardError
317
- nil
318
- end
319
- if r && r['id'] == 1 && r['name'] == 'test'
320
- found = true
321
- break
322
- end
323
- end
324
-
325
- assert(found, 'Expected record with name == test not found in Kusto')
326
- end
327
-
328
- test 'write function ingests data to Kusto' do
329
- test_table = "FluentD_write_#{Time.now.to_i}"
330
- configure_and_start_driver(
331
- table_name: test_table,
332
- buffered: true
333
- )
334
- setup_test_table(test_table)
335
-
336
- tag = 'e2e.write'
337
- time = Time.now.to_i
338
- events = [
339
- [time, { 'id' => 2, 'name' => 'write_test_1' }],
340
- [time + 1, { 'id' => 2, 'name' => 'write_test_2' }],
341
- [time + 2, { 'id' => 2, 'name' => 'write_test_3' }],
342
- [time + 3, { 'id' => 2, 'name' => 'write_test_4' }],
343
- [time + 4, { 'id' => 2, 'name' => 'write_test_5' }]
344
- ]
345
-
346
- @driver.run(default_tag: tag) do
347
- events.each do |t, r|
348
- @driver.feed(tag, t, r)
349
- end
350
- sleep 5 # Wait for buffer flush
351
- end
352
-
353
- query = "#{test_table} | extend r = parse_json(record) | where r.id == 2 and r.name startswith \"write_test_\""
354
- rows = wait_for_ingestion(query, 5)
355
-
356
- assert(rows.size >= 5, 'Not all events were ingested into Kusto by write')
357
- end
358
-
359
- test 'try_write function ingests data to Kusto' do
360
- test_table = "FluentD_trywrite_#{Time.now.to_i}"
361
- configure_and_start_driver(
362
- table_name: test_table,
363
- buffered: true,
364
- delayed: true
365
- )
366
- setup_test_table(test_table)
367
-
368
- tag = 'e2e.try_write'
369
- time = Time.now.to_i
370
- events = [
371
- [time, { 'id' => 3, 'name' => 'try_write_test_1' }],
372
- [time + 1, { 'id' => 3, 'name' => 'try_write_test_2' }],
373
- [time + 2, { 'id' => 3, 'name' => 'try_write_test_3' }],
374
- [time + 3, { 'id' => 3, 'name' => 'try_write_test_4' }],
375
- [time + 4, { 'id' => 3, 'name' => 'try_write_test_5' }]
376
- ]
377
-
378
- @driver.run(default_tag: tag) do
379
- events.each do |t, r|
380
- @driver.feed(tag, t, r)
381
- end
382
- sleep 5 # Wait for buffer flush
383
- end
384
-
385
- query = "#{test_table} | extend r = parse_json(record) | where r.id == 3 and r.name startswith \"try_write_test_\""
386
- rows = wait_for_ingestion(query, 5)
387
-
388
- assert(rows.size >= 5, 'Not all events were ingested into Kusto by try_write')
389
-
390
- chunk_id = rows[0][3]['chunk_id'] if rows[0] && rows[0][3] && rows[0][3]['chunk_id']
391
- assert(chunk_id, 'chunk_id not found in ingested records')
392
-
393
- query_chunk = "#{test_table} | extend r = parse_json(record) | where r.chunk_id == '#{chunk_id}'"
394
- chunk_rows = wait_for_ingestion(query_chunk, 5)
395
-
396
- assert(chunk_rows.size >= 5, 'Not all chunk records were committed in Kusto by try_write')
397
- end
398
-
399
- test 'try_write function ingests data to Kusto with parallel chunk commit' do
400
- test_table = "FluentD_trywrite_parallel_#{Time.now.to_i}"
401
- configure_and_start_driver(
402
- table_name: test_table,
403
- buffered: true,
404
- delayed: true,
405
- chunk_limit_size: '256'
406
- )
407
- setup_test_table(test_table)
408
-
409
- tag = 'e2e.try_write_parallel'
410
- time = Time.now.to_i
411
- events = []
412
- 10.times do |i|
413
- events << [time + i, { 'id' => 4, 'name' => "try_write_parallel_test_#{i + 1}" }]
414
- end
415
-
416
- @driver.run(default_tag: tag) do
417
- events.each do |t, r|
418
- @driver.feed(tag, t, r)
419
- end
420
- sleep 5 # Wait for buffer flush
421
- end
422
-
423
- query = "#{test_table} | extend r = parse_json(record) | where r.id == 4 and r.name startswith \"try_write_parallel_test_\""
424
- rows = wait_for_ingestion(query, 10)
425
-
426
- assert(rows.size >= 10, 'Not all events were ingested into Kusto by try_write (parallel)')
427
-
428
- chunk_ids = rows.map { |row| row[3]['chunk_id'] if row[3] && row[3]['chunk_id'] }.compact.uniq
429
- assert(chunk_ids.size >= 2, 'Less than 2 chunk_ids found, parallel chunking not verified')
430
-
431
- # Check chunk commit by verifying all records with each chunk_id
432
- chunk_ids.each do |cid|
433
- expected_count = rows.count { |row| row[3]['chunk_id'] == cid }
434
- query_chunk = "#{test_table} | extend r = parse_json(record) | where r.chunk_id == '#{cid}'"
435
- chunk_rows = wait_for_ingestion(query_chunk, expected_count)
436
-
437
- assert(chunk_rows.size == expected_count,
438
- "Not all chunk records were committed in Kusto for chunk_id #{cid} (expected #{expected_count}, got #{chunk_rows.size})")
439
- end
440
- end
441
-
442
- # ESSENTIAL E2E BUFFERING TEST CASES - START
443
-
444
- # Test Case 1: Non-buffered mode with compression disabled
445
- test 'non_buffered_compression_disabled' do
446
- table_name = "FluentD_non_buffered_no_compression_#{Time.now.to_i}"
447
- configure_and_start_driver(
448
- table_name: table_name,
449
- buffered: false,
450
- compression_enabled: false
451
- )
452
- setup_test_table(table_name)
453
-
454
- tag = 'e2e.non_buffered.no_compression'
455
- events = generate_test_events(3, 1000, 'no_comp')
456
-
457
- events.each do |time, record|
458
- event_stream = Fluent::ArrayEventStream.new([[time, record]])
459
- assert_nothing_raised { @driver.instance.process(tag, event_stream) }
460
- end
461
-
462
- query = "#{table_name} | extend r = parse_json(record) | where r.id >= 1000 and r.id <= 1002"
463
- rows = wait_for_ingestion(query, 3)
464
-
465
- assert(rows.size >= 3, "Expected 3 records, got #{rows.size} in non-buffered mode with compression disabled")
466
- end
467
-
468
- # Test Case 2: Memory buffered mode with immediate flush
469
- test 'memory_buffered_immediate_flush' do
470
- table_name = "FluentD_memory_buffered_immediate_#{Time.now.to_i}"
471
- configure_and_start_driver(
472
- table_name: table_name,
473
- buffered: true,
474
- buffer_type: 'memory',
475
- flush_mode: 'immediate'
476
- )
477
- setup_test_table(table_name)
478
-
479
- tag = 'e2e.memory_buffered.immediate'
480
- events = generate_test_events(5, 2000, 'mem_imm')
481
-
482
- @driver.run(default_tag: tag) do
483
- events.each do |time, record|
484
- @driver.feed(tag, time, record)
485
- end
486
- sleep 3 # Allow time for immediate flush
487
- end
488
-
489
- query = "#{table_name} | extend r = parse_json(record) | where r.id >= 2000 and r.id <= 2004"
490
- rows = wait_for_ingestion(query, 5)
491
-
492
- assert(rows.size >= 5, "Expected 5 records, got #{rows.size} in memory buffered immediate flush")
493
- end
494
-
495
- # Test Case 3: Memory buffered mode with interval flush
496
- test 'memory_buffered_interval_flush' do
497
- table_name = "FluentD_memory_buffered_interval_#{Time.now.to_i}"
498
- configure_and_start_driver(
499
- table_name: table_name,
500
- buffered: true,
501
- buffer_type: 'memory',
502
- flush_mode: 'interval',
503
- flush_interval: '3s'
504
- )
505
- setup_test_table(table_name)
506
-
507
- tag = 'e2e.memory_buffered.interval'
508
- events = generate_test_events(7, 3000, 'mem_int')
509
-
510
- @driver.run(default_tag: tag) do
511
- events.each do |time, record|
512
- @driver.feed(tag, time, record)
513
- end
514
- sleep 8 # Wait longer than flush_interval
515
- end
516
-
517
- query = "#{table_name} | extend r = parse_json(record) | where r.id >= 3000 and r.id <= 3006"
518
- rows = wait_for_ingestion(query, 7)
519
-
520
- assert(rows.size >= 7, "Expected 7 records, got #{rows.size} in memory buffered interval flush")
521
- end
522
-
523
- # Test Case 4: Memory buffered mode with chunk size limit
524
- test 'memory_buffered_chunk_size_limit' do
525
- table_name = "FluentD_memory_buffered_chunk_limit_#{Time.now.to_i}"
526
- configure_and_start_driver(
527
- table_name: table_name,
528
- buffered: true,
529
- buffer_type: 'memory',
530
- chunk_limit_size: '512' # Small to force multiple chunks
531
- )
532
- setup_test_table(table_name)
533
-
534
- tag = 'e2e.memory_buffered.chunk_limit'
535
- # Create larger events to exceed chunk size quickly
536
- events = []
537
- 10.times do |i|
538
- large_data = 'x' * 100 # Create large payload
539
- events << [
540
- Time.now.to_i + i,
541
- {
542
- 'id' => 4000 + i,
543
- 'name' => "chunk_limit_test_#{i + 1}",
544
- 'large_field' => large_data,
545
- 'data' => { 'index' => i, 'test_type' => 'chunk_limit' }
546
- }
547
- ]
548
- end
549
-
550
- @driver.run(default_tag: tag) do
551
- events.each do |time, record|
552
- @driver.feed(tag, time, record)
553
- end
554
- sleep 8
555
- end
556
-
557
- query = "#{table_name} | extend r = parse_json(record) | where r.id >= 4000 and r.id <= 4009"
558
- rows = wait_for_ingestion(query, 10)
559
-
560
- assert(rows.size >= 10, "Expected 10 records, got #{rows.size} in chunk size limit test")
561
- end
562
-
563
- # Test Case 5: Delayed commit mode with sync verification
564
- test 'delayed_commit_sync_verification' do
565
- table_name = "FluentD_delayed_commit_sync_#{Time.now.to_i}"
566
- configure_and_start_driver(
567
- table_name: table_name,
568
- buffered: true,
569
- delayed: true,
570
- flush_interval: '3s',
571
- deferred_commit_timeout: 15
572
- )
573
- setup_test_table(table_name)
574
-
575
- tag = 'e2e.delayed_commit.sync'
576
- events = generate_test_events(4, 5000, 'delayed_sync')
577
-
578
- @driver.run(default_tag: tag) do
579
- events.each do |time, record|
580
- @driver.feed(tag, time, record)
581
- end
582
- sleep 8
583
- end
584
-
585
- query = "#{table_name} | extend r = parse_json(record) | where r.id >= 5000 and r.id <= 5003"
586
- rows = wait_for_ingestion(query, 4)
587
-
588
- assert(rows.size >= 4, "Expected 4 records, got #{rows.size} in delayed commit sync mode")
589
-
590
- # Verify chunk_id exists (added by delayed commit)
591
- chunk_ids = rows.map { |row| row[3]['chunk_id'] if row[3] }.compact.uniq
592
- assert(chunk_ids.size >= 1, 'No chunk_ids found in delayed commit mode')
593
- end
594
-
595
- # Test Case 6: Delayed commit mode with multiple chunks
596
- test 'delayed_commit_multiple_chunks' do
597
- table_name = "FluentD_delayed_commit_multi_chunks_#{Time.now.to_i}"
598
- configure_and_start_driver(
599
- table_name: table_name,
600
- buffered: true,
601
- delayed: true,
602
- chunk_limit_size: '300', # Small chunks to force multiple
603
- flush_interval: '4s',
604
- deferred_commit_timeout: 15
605
- )
606
- setup_test_table(table_name)
607
-
608
- tag = 'e2e.delayed_commit.multi_chunks'
609
- events = generate_test_events(12, 6000, 'multi_chunk')
610
-
611
- @driver.run(default_tag: tag) do
612
- events.each do |time, record|
613
- @driver.feed(tag, time, record)
614
- end
615
- sleep 10
616
- end
617
-
618
- query = "#{table_name} | extend r = parse_json(record) | where r.id >= 6000 and r.id <= 6011"
619
- rows = wait_for_ingestion(query, 12)
620
-
621
- assert(rows.size >= 12, "Expected 12 records, got #{rows.size} in delayed commit multiple chunks")
622
-
623
- # Verify multiple chunk_ids exist
624
- chunk_ids = rows.map { |row| row[3]['chunk_id'] if row[3] }.compact.uniq
625
- assert(chunk_ids.size >= 1, "Expected chunk_ids, got #{chunk_ids.size}")
626
- end
627
-
628
- # Test Case 7: File buffer with persistent storage
629
- test 'file_buffer_persistent_storage' do
630
- table_name = "FluentD_file_buffer_persistent_#{Time.now.to_i}"
631
- buffer_path = create_temp_buffer_file
632
- configure_and_start_driver(
633
- table_name: table_name,
634
- buffered: true,
635
- buffer_type: 'file',
636
- buffer_path: buffer_path,
637
- flush_interval: '5s',
638
- chunk_limit_size: '4k'
639
- )
640
- setup_test_table(table_name)
641
-
642
- tag = 'e2e.file_buffer.persistent'
643
- events = generate_test_events(6, 20_000, 'file_buf')
644
-
645
- @driver.run(default_tag: tag) do
646
- events.each do |time, record|
647
- @driver.feed(tag, time, record)
648
- end
649
- sleep 8
650
- end
651
-
652
- query = "#{table_name} | extend r = parse_json(record) | where r.id >= 20000 and r.id <= 20005"
653
- rows = wait_for_ingestion(query, 6)
654
-
655
- assert(rows.size >= 6, "Expected 6 records, got #{rows.size} in file buffer persistent storage test")
656
- end
657
-
658
- # Test Case 8: Buffered mode with compression enabled
659
- test 'buffered_mode_compression_enabled' do
660
- table_name = "FluentD_buffered_compression_#{Time.now.to_i}"
661
- configure_and_start_driver(
662
- table_name: table_name,
663
- buffered: true,
664
- compression_enabled: true,
665
- flush_interval: '4s',
666
- chunk_limit_size: '8k'
667
- )
668
- setup_test_table(table_name)
669
-
670
- tag = 'e2e.buffered.compression'
671
- events = generate_test_events(10, 7000, 'compression')
672
-
673
- @driver.run(default_tag: tag) do
674
- events.each do |time, record|
675
- @driver.feed(tag, time, record)
676
- end
677
- sleep 8
678
- end
679
-
680
- query = "#{table_name} | extend r = parse_json(record) | where r.id >= 7000 and r.id <= 7009"
681
- rows = wait_for_ingestion(query, 10)
682
-
683
- assert(rows.size >= 10, "Expected 10 records, got #{rows.size} in compression test")
684
- end
685
-
686
- # ESSENTIAL E2E BUFFERING TEST CASES - END
687
-
688
- # INGESTION MAPPING REFERENCE TESTS - START
689
-
690
- # Test ingestion with mapping reference specified
691
- test 'ingestion_with_mapping_reference' do
692
- test_table = "FluentD_mapping_ref_#{Time.now.to_i}"
693
-
694
- # Create a test mapping in Kusto first
695
- mapping_name = "test_mapping_#{Time.now.to_i}"
696
- create_mapping_query = <<-KQL
697
- .create table #{test_table} ingestion json mapping "#{mapping_name}"
698
- @'[
699
- {"column":"tag", "path":"$.tag", "datatype":"string"},
700
- {"column":"timestamp", "path":"$.timestamp", "datatype":"datetime"},
701
- {"column":"record", "path":"$.record", "datatype":"string"}
702
- ]'
703
- KQL
704
-
705
- configure_and_start_driver(
706
- table_name: test_table,
707
- buffered: true,
708
- ingestion_mapping_reference: mapping_name
709
- )
710
- setup_test_table(test_table)
711
-
712
- # Create the mapping
713
- kusto_query(create_mapping_query, :management)
714
-
715
- tag = 'e2e.mapping_test'
716
- time = Time.now.to_i
717
- events = [
718
- [time, { 'id' => 8000, 'name' => 'mapping_test_1', 'level' => 'info' }],
719
- [time + 1, { 'id' => 8001, 'name' => 'mapping_test_2', 'level' => 'warn' }],
720
- [time + 2, { 'id' => 8002, 'name' => 'mapping_test_3', 'level' => 'error' }]
721
- ]
722
-
723
- @driver.run(default_tag: tag) do
724
- events.each do |t, r|
725
- @driver.feed(tag, t, r)
726
- end
727
- sleep 8 # Wait for ingestion
728
- end
729
-
730
- query = "#{test_table} | extend r = parse_json(record) | where r.id >= 8000 and r.id <= 8002"
731
- rows = wait_for_ingestion(query, 3)
732
-
733
- assert(rows.size >= 3, "Expected 3 records with mapping reference, got #{rows.size}")
734
-
735
- # Verify the data structure is correct (mapping was applied)
736
- found_records = 0
737
- rows.each do |row|
738
- r = row[3] if row[3]
739
- if r && r['id'] && r['id'] >= 8000 && r['id'] <= 8002
740
- found_records += 1
741
- end
742
- end
743
-
744
- assert(found_records >= 3, "Expected 3 mapped records, found #{found_records}")
745
-
746
- # Clean up the mapping
747
- kusto_query(".drop table #{test_table} ingestion json mapping '#{mapping_name}'", :management)
748
- end
749
-
750
- # Test ingestion without mapping reference (default behavior)
751
- test 'ingestion_without_mapping_reference' do
752
- test_table = "FluentD_no_mapping_#{Time.now.to_i}"
753
-
754
- configure_and_start_driver(
755
- table_name: test_table,
756
- buffered: true
757
- # No ingestion_mapping_reference specified
758
- )
759
- setup_test_table(test_table)
760
-
761
- tag = 'e2e.no_mapping_test'
762
- time = Time.now.to_i
763
- events = [
764
- [time, { 'id' => 9000, 'name' => 'no_mapping_test_1', 'level' => 'info' }],
765
- [time + 1, { 'id' => 9001, 'name' => 'no_mapping_test_2', 'level' => 'warn' }],
766
- [time + 2, { 'id' => 9002, 'name' => 'no_mapping_test_3', 'level' => 'error' }]
767
- ]
768
-
769
- @driver.run(default_tag: tag) do
770
- events.each do |t, r|
771
- @driver.feed(tag, t, r)
772
- end
773
- sleep 8 # Wait for ingestion
774
- end
775
-
776
- query = "#{test_table} | extend r = parse_json(record) | where r.id >= 9000 and r.id <= 9002"
777
- rows = wait_for_ingestion(query, 3)
778
-
779
- assert(rows.size >= 3, "Expected 3 records without mapping reference, got #{rows.size}")
780
-
781
- # Verify the data structure is correct (default 3-column schema)
782
- found_records = 0
783
- rows.each do |row|
784
- # Verify standard schema: tag, timestamp, record
785
- tag_val = row[0]
786
- timestamp_val = row[1]
787
- record_val = row[2] || row[3] # Handle different response formats
788
-
789
- if tag_val && timestamp_val && record_val
790
- r = JSON.parse(record_val) if record_val.is_a?(String)
791
- r = record_val if record_val.is_a?(Hash)
792
-
793
- if r && r['id'] && r['id'] >= 9000 && r['id'] <= 9002
794
- found_records += 1
795
- end
796
- end
797
- end
798
-
799
- assert(found_records >= 3, "Expected 3 records with default schema, found #{found_records}")
800
- end
801
-
802
- # Test ingestion mapping with delayed commit
803
- test 'ingestion_mapping_with_delayed_commit' do
804
- test_table = "FluentD_mapping_delayed_#{Time.now.to_i}"
805
-
806
- # Create a test mapping in Kusto first
807
- mapping_name = "delayed_mapping_#{Time.now.to_i}"
808
- create_mapping_query = <<-KQL
809
- .create table #{test_table} ingestion json mapping "#{mapping_name}"
810
- @'[
811
- {"column":"tag", "path":"$.tag", "datatype":"string"},
812
- {"column":"timestamp", "path":"$.timestamp", "datatype":"datetime"},
813
- {"column":"record", "path":"$.record", "datatype":"string"}
814
- ]'
815
- KQL
816
-
817
- configure_and_start_driver(
818
- table_name: test_table,
819
- buffered: true,
820
- delayed: true,
821
- ingestion_mapping_reference: mapping_name,
822
- deferred_commit_timeout: 20
823
- )
824
- setup_test_table(test_table)
825
-
826
- # Create the mapping
827
- kusto_query(create_mapping_query, :management)
828
-
829
- tag = 'e2e.mapping_delayed'
830
- time = Time.now.to_i
831
- events = [
832
- [time, { 'id' => 10000, 'name' => 'mapping_delayed_1', 'category' => 'test' }],
833
- [time + 1, { 'id' => 10001, 'name' => 'mapping_delayed_2', 'category' => 'test' }]
834
- ]
835
-
836
- @driver.run(default_tag: tag) do
837
- events.each do |t, r|
838
- @driver.feed(tag, t, r)
839
- end
840
- sleep 10 # Wait for delayed commit
841
- end
842
-
843
- query = "#{test_table} | extend r = parse_json(record) | where r.id >= 10000 and r.id <= 10001"
844
- rows = wait_for_ingestion(query, 2)
845
-
846
- assert(rows.size >= 2, "Expected 2 records with mapping and delayed commit, got #{rows.size}")
847
-
848
- # Verify chunk_id exists (added by delayed commit) and mapping was applied
849
- chunk_ids = rows.map { |row|
850
- r = row[3] if row[3]
851
- r = JSON.parse(row[2]) if row[2].is_a?(String) && !row[3]
852
- r['chunk_id'] if r && r['chunk_id']
853
- }.compact.uniq
854
-
855
- assert(chunk_ids.size >= 1, 'No chunk_ids found in delayed commit with mapping')
856
-
857
- # Clean up the mapping
858
- kusto_query(".drop table #{test_table} ingestion json mapping '#{mapping_name}'", :management)
859
- end
860
-
861
- # INGESTION MAPPING REFERENCE TESTS - END
862
- end