fluent-plugin-kusto 0.0.1.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,683 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test-unit'
4
+ require 'fluent/test'
5
+ require 'fluent/test/driver/output'
6
+ require 'fluent/test/helpers'
7
+ require 'fluent/plugin/out_kusto'
8
+ require 'net/http'
9
+ require 'uri'
10
+ require 'json'
11
+ require_relative '../../lib/fluent/plugin/kusto_query'
12
+ require_relative '../../lib/fluent/plugin/ingester'
13
+ require_relative '../../lib/fluent/plugin/conffile'
14
+ require 'ostruct'
15
+ require 'logger'
16
+ require 'concurrent'
17
+ require 'tempfile'
18
+ require 'set'
19
+
20
+ class KustoE2ETest < Test::Unit::TestCase
21
+ include Fluent::Test::Helpers
22
+
23
+ def setup
24
+ Fluent::Test.setup
25
+ # Setup logger
26
+ @logger = Logger.new($stdout)
27
+ @logger.level = Logger::INFO
28
+
29
+ # Configuration from environment
30
+ @engine_url = ENV['CLUSTER'] || 'https://example.kusto.windows.net'
31
+ @database = ENV['DB'] || 'testdb'
32
+ @table = "FluentD_#{Time.now.to_i}"
33
+ @columns = '(tag:string, timestamp:datetime, record:string)'
34
+ @client_id = ENV['CLIENT_ID'] || ''
35
+ @client_secret = ENV['CLIENT_SECRET'] || ''
36
+ @tenant_id = ENV['TENANT_ID'] || ''
37
+ @managed_identity_client_id = ENV['MANAGED_IDENTITY_CLIENT_ID'] || ''
38
+ @auth_type = (ENV['AUTH_TYPE'] || 'aad').downcase
39
+ @wi_client_id = ENV['WI_CLIENT_ID'] || ''
40
+ @wi_tenant_id = ENV['WI_TENANT_ID'] || ''
41
+ @wi_token_file = ENV['WI_TOKEN_FILE'] || ''
42
+
43
+ # Create driver with default configuration
44
+ setup_auth_config
45
+ configure_and_start_driver
46
+ setup_test_table(@table)
47
+ end
48
+
49
+ def teardown
50
+ kusto_query(".drop table #{@table} ifexists", :management)
51
+ end
52
+
53
+ def get_access_token
54
+ # Use the same logic as the plugin's Ingester class
55
+ opts = {
56
+ tenant_id: @tenant_id,
57
+ kusto_endpoint: @engine_url,
58
+ database_name: @database,
59
+ table_name: @table,
60
+ azure_cloud: 'AzureCloud'
61
+ }
62
+
63
+ case @auth_type
64
+ when 'azcli'
65
+ opts[:auth_type] = 'azcli'
66
+ when 'workload_identity'
67
+ opts[:auth_type] = 'workload_identity'
68
+ opts[:workload_identity_client_id] = @wi_client_id
69
+ opts[:workload_identity_tenant_id] = @wi_tenant_id
70
+ opts[:workload_identity_token_file_path] = @wi_token_file
71
+ when 'user_managed_identity', 'system_managed_identity'
72
+ opts[:auth_type] = @auth_type
73
+ opts[:managed_identity_client_id] = @managed_identity_client_id
74
+ else
75
+ opts[:auth_type] = 'aad'
76
+ opts[:client_app_id] = @client_id
77
+ opts[:client_app_secret] = @client_secret
78
+ end
79
+
80
+ outconfig = OutputConfiguration.new(opts)
81
+ ingester = Ingester.new(outconfig)
82
+ def ingester.access_token
83
+ token = @client.token_provider.get_token
84
+ # Extract token if it's a hash or object
85
+ case token
86
+ when Hash
87
+ token[:access_token] || token['access_token']
88
+ when String
89
+ token
90
+ else
91
+ token.respond_to?(:access_token) ? token.access_token : token.to_s
92
+ end
93
+ end
94
+ ingester.access_token
95
+ end
96
+
97
+ def kusto_query(query, type = :data)
98
+ endpoint = @engine_url
99
+ path = type == :management ? '/v1/rest/mgmt' : '/v1/rest/query'
100
+ uri = URI("#{endpoint}#{path}")
101
+ token = get_access_token
102
+
103
+ headers = {
104
+ 'Authorization' => "Bearer #{token}",
105
+ 'Content-Type' => 'application/json',
106
+ 'Accept' => 'application/json',
107
+ 'x-ms-client-version' => 'Kusto.FluentD:1.0.0'
108
+ }
109
+
110
+ body_hash = { csl: query }
111
+ body_hash[:db] = @database if @database
112
+ body = body_hash.to_json
113
+
114
+ request = Net::HTTP::Post.new(uri.request_uri, headers)
115
+ request.body = body
116
+ http = Net::HTTP.new(uri.host, uri.port)
117
+ http.use_ssl = true
118
+
119
+ response = http.request(request)
120
+ unless response.code.to_i.between?(200, 299)
121
+ @logger.error("Kusto query failed with status #{response.code}: #{response.body}")
122
+ return []
123
+ end
124
+
125
+ begin
126
+ response_json = JSON.parse(response.body)
127
+ tables = response_json['Tables']
128
+ rows = tables && tables[0] && tables[0]['Rows']
129
+ rows || []
130
+ rescue JSON::ParserError => e
131
+ @logger.error("Failed to parse JSON: #{e}")
132
+ @logger.error(response.body)
133
+ []
134
+ end
135
+ end
136
+
137
+ def setup_auth_config
138
+ @auth_lines = case @auth_type
139
+ when 'azcli'
140
+ <<-AUTH
141
+ auth_type azcli
142
+ AUTH
143
+ when 'workload_identity'
144
+ <<-AUTH
145
+ auth_type workload_identity
146
+ workload_identity_client_id #{@wi_client_id}
147
+ workload_identity_tenant_id #{@wi_tenant_id}
148
+ workload_identity_token_file_path #{@wi_token_file}
149
+ AUTH
150
+ when 'user_managed_identity', 'system_managed_identity'
151
+ <<-AUTH
152
+ auth_type #{@auth_type}
153
+ managed_identity_client_id #{@managed_identity_client_id}
154
+ AUTH
155
+ else
156
+ <<-AUTH
157
+ auth_type aad
158
+ tenant_id #{@tenant_id}
159
+ client_id #{@client_id}
160
+ client_secret #{@client_secret}
161
+ AUTH
162
+ end
163
+ end
164
+
165
+ def configure_and_start_driver(options = {})
166
+ config_options = {
167
+ buffered: false,
168
+ delayed: false,
169
+ table_name: @table,
170
+ flush_interval: '5s',
171
+ chunk_limit_size: '8k',
172
+ timekey: 60,
173
+ compression_enabled: true
174
+ }.merge(options)
175
+
176
+ buffer_config = if config_options[:buffered]
177
+ buffer_type = config_options[:buffer_type] || 'memory'
178
+ flush_mode = config_options[:flush_mode] || 'interval'
179
+
180
+ base_buffer = <<-BUFFER
181
+ <buffer>
182
+ @type #{buffer_type}
183
+ chunk_limit_size #{config_options[:chunk_limit_size]}
184
+ timekey #{config_options[:timekey]}
185
+ flush_mode #{flush_mode}
186
+ flush_at_shutdown #{config_options[:flush_at_shutdown] || 'true'}
187
+ overflow_action #{config_options[:overflow_action] || 'throw_exception'}
188
+ retry_max_interval #{config_options[:retry_max_interval] || '30'}
189
+ retry_forever #{config_options[:retry_forever] || 'false'}
190
+ flush_thread_count #{config_options[:flush_thread_count] || '1'}
191
+ BUFFER
192
+
193
+ # Only add flush_interval if flush_mode is not 'immediate'
194
+ if flush_mode != 'immediate'
195
+ base_buffer = base_buffer.sub(/flush_mode #{flush_mode}/,
196
+ "flush_interval #{config_options[:flush_interval]}\n flush_mode #{flush_mode}")
197
+ end
198
+
199
+ # Add file-specific configurations
200
+ if buffer_type == 'file'
201
+ base_buffer += " path #{config_options[:buffer_path] || '/tmp/fluentd_test_buffer'}\n"
202
+ end
203
+
204
+ # Add additional buffer configurations
205
+ if config_options[:total_limit_size]
206
+ base_buffer += " total_limit_size #{config_options[:total_limit_size]}\n"
207
+ end
208
+
209
+ if config_options[:chunk_limit_records]
210
+ base_buffer += " chunk_limit_records #{config_options[:chunk_limit_records]}\n"
211
+ end
212
+
213
+ base_buffer += " </buffer>\n"
214
+ base_buffer
215
+ else
216
+ ''
217
+ end
218
+
219
+ # Add deferred_commit_timeout if specified
220
+ timeout_config = config_options[:deferred_commit_timeout] ? "deferred_commit_timeout #{config_options[:deferred_commit_timeout]}" : ''
221
+
222
+ @conf = <<-CONF
223
+ @type kusto
224
+ @log_level debug
225
+ buffered #{config_options[:buffered]}
226
+ delayed #{config_options[:delayed]}
227
+ endpoint #{@engine_url}
228
+ database_name #{@database}
229
+ table_name #{config_options[:table_name]}
230
+ compression_enabled #{config_options[:compression_enabled]}
231
+ #{timeout_config}
232
+ #{@auth_lines}
233
+ #{buffer_config}
234
+ CONF
235
+
236
+ @driver = Fluent::Test::Driver::Output.new(Fluent::Plugin::KustoOutput).configure(@conf)
237
+ @driver.instance.instance_variable_set(:@logger, @logger)
238
+ @driver.instance.start
239
+ end
240
+
241
+ def setup_test_table(table_name)
242
+ kusto_query(".drop table #{table_name} ifexists", :management)
243
+ kusto_query(".create table #{table_name} #{@columns}", :management)
244
+ end
245
+
246
+ def wait_for_ingestion(query, expected_count, max_wait = 240, interval = 5)
247
+ waited = 0
248
+ rows = []
249
+
250
+ while waited < max_wait
251
+ rows = kusto_query(query)
252
+ break if rows.size >= expected_count
253
+
254
+ sleep interval
255
+ waited += interval
256
+ @logger.debug("Waiting for ingestion: #{waited}s elapsed, #{rows.size}/#{expected_count} records found")
257
+ end
258
+
259
+ rows
260
+ end
261
+
262
+ def generate_test_events(count, base_id, tag_suffix = '')
263
+ time = Time.now.to_i
264
+ events = []
265
+ count.times do |i|
266
+ events << [
267
+ time + i,
268
+ {
269
+ 'id' => base_id + i,
270
+ 'name' => "test_event_#{tag_suffix}_#{i + 1}",
271
+ 'timestamp' => Time.at(time + i).utc.iso8601,
272
+ 'data' => {
273
+ 'index' => i,
274
+ 'batch_id' => base_id,
275
+ 'test_type' => tag_suffix
276
+ }
277
+ }
278
+ ]
279
+ end
280
+ events
281
+ end
282
+
283
+ def create_temp_buffer_file
284
+ temp_file = Tempfile.new(['fluentd_buffer', '.buf'])
285
+ temp_path = temp_file.path
286
+ temp_file.close
287
+ temp_path
288
+ end
289
+
290
+ # Before running this test, ensure your service principal has TableAdmin and Ingestor permissions on the test database.
291
+ test 'process function ingestion to Kusto' do
292
+ test_table = "FluentD_process_#{Time.now.to_i}"
293
+ configure_and_start_driver(table_name: test_table)
294
+ setup_test_table(test_table)
295
+
296
+ tag = 'e2e.test'
297
+ time = Time.now.to_i
298
+ record = { 'id' => 1, 'name' => 'test' }
299
+ event_stream = Fluent::ArrayEventStream.new([[time, record]])
300
+
301
+ assert_nothing_raised { @driver.instance.process(tag, event_stream) }
302
+
303
+ query = "#{test_table} | extend r = parse_json(record) | where r.id == 1 and r.name == \"test\""
304
+ rows = wait_for_ingestion(query, 1)
305
+
306
+ assert(!rows.empty?, 'Data was not ingested into Kusto')
307
+
308
+ found = false
309
+ rows.each do |row|
310
+ r = begin
311
+ row[3]
312
+ rescue StandardError
313
+ nil
314
+ end
315
+ if r && r['id'] == 1 && r['name'] == 'test'
316
+ found = true
317
+ break
318
+ end
319
+ end
320
+
321
+ assert(found, 'Expected record with name == test not found in Kusto')
322
+ end
323
+
324
+ test 'write function ingests data to Kusto' do
325
+ test_table = "FluentD_write_#{Time.now.to_i}"
326
+ configure_and_start_driver(
327
+ table_name: test_table,
328
+ buffered: true
329
+ )
330
+ setup_test_table(test_table)
331
+
332
+ tag = 'e2e.write'
333
+ time = Time.now.to_i
334
+ events = [
335
+ [time, { 'id' => 2, 'name' => 'write_test_1' }],
336
+ [time + 1, { 'id' => 2, 'name' => 'write_test_2' }],
337
+ [time + 2, { 'id' => 2, 'name' => 'write_test_3' }],
338
+ [time + 3, { 'id' => 2, 'name' => 'write_test_4' }],
339
+ [time + 4, { 'id' => 2, 'name' => 'write_test_5' }]
340
+ ]
341
+
342
+ @driver.run(default_tag: tag) do
343
+ events.each do |t, r|
344
+ @driver.feed(tag, t, r)
345
+ end
346
+ sleep 5 # Wait for buffer flush
347
+ end
348
+
349
+ query = "#{test_table} | extend r = parse_json(record) | where r.id == 2 and r.name startswith \"write_test_\""
350
+ rows = wait_for_ingestion(query, 5)
351
+
352
+ assert(rows.size >= 5, 'Not all events were ingested into Kusto by write')
353
+ end
354
+
355
+ test 'try_write function ingests data to Kusto' do
356
+ test_table = "FluentD_trywrite_#{Time.now.to_i}"
357
+ configure_and_start_driver(
358
+ table_name: test_table,
359
+ buffered: true,
360
+ delayed: true
361
+ )
362
+ setup_test_table(test_table)
363
+
364
+ tag = 'e2e.try_write'
365
+ time = Time.now.to_i
366
+ events = [
367
+ [time, { 'id' => 3, 'name' => 'try_write_test_1' }],
368
+ [time + 1, { 'id' => 3, 'name' => 'try_write_test_2' }],
369
+ [time + 2, { 'id' => 3, 'name' => 'try_write_test_3' }],
370
+ [time + 3, { 'id' => 3, 'name' => 'try_write_test_4' }],
371
+ [time + 4, { 'id' => 3, 'name' => 'try_write_test_5' }]
372
+ ]
373
+
374
+ @driver.run(default_tag: tag) do
375
+ events.each do |t, r|
376
+ @driver.feed(tag, t, r)
377
+ end
378
+ sleep 5 # Wait for buffer flush
379
+ end
380
+
381
+ query = "#{test_table} | extend r = parse_json(record) | where r.id == 3 and r.name startswith \"try_write_test_\""
382
+ rows = wait_for_ingestion(query, 5)
383
+
384
+ assert(rows.size >= 5, 'Not all events were ingested into Kusto by try_write')
385
+
386
+ chunk_id = rows[0][3]['chunk_id'] if rows[0] && rows[0][3] && rows[0][3]['chunk_id']
387
+ assert(chunk_id, 'chunk_id not found in ingested records')
388
+
389
+ query_chunk = "#{test_table} | extend r = parse_json(record) | where r.chunk_id == '#{chunk_id}'"
390
+ chunk_rows = wait_for_ingestion(query_chunk, 5)
391
+
392
+ assert(chunk_rows.size >= 5, 'Not all chunk records were committed in Kusto by try_write')
393
+ end
394
+
395
+ test 'try_write function ingests data to Kusto with parallel chunk commit' do
396
+ test_table = "FluentD_trywrite_parallel_#{Time.now.to_i}"
397
+ configure_and_start_driver(
398
+ table_name: test_table,
399
+ buffered: true,
400
+ delayed: true,
401
+ chunk_limit_size: '256'
402
+ )
403
+ setup_test_table(test_table)
404
+
405
+ tag = 'e2e.try_write_parallel'
406
+ time = Time.now.to_i
407
+ events = []
408
+ 10.times do |i|
409
+ events << [time + i, { 'id' => 4, 'name' => "try_write_parallel_test_#{i + 1}" }]
410
+ end
411
+
412
+ @driver.run(default_tag: tag) do
413
+ events.each do |t, r|
414
+ @driver.feed(tag, t, r)
415
+ end
416
+ sleep 5 # Wait for buffer flush
417
+ end
418
+
419
+ query = "#{test_table} | extend r = parse_json(record) | where r.id == 4 and r.name startswith \"try_write_parallel_test_\""
420
+ rows = wait_for_ingestion(query, 10)
421
+
422
+ assert(rows.size >= 10, 'Not all events were ingested into Kusto by try_write (parallel)')
423
+
424
+ chunk_ids = rows.map { |row| row[3]['chunk_id'] if row[3] && row[3]['chunk_id'] }.compact.uniq
425
+ assert(chunk_ids.size >= 2, 'Less than 2 chunk_ids found, parallel chunking not verified')
426
+
427
+ # Check chunk commit by verifying all records with each chunk_id
428
+ chunk_ids.each do |cid|
429
+ expected_count = rows.count { |row| row[3]['chunk_id'] == cid }
430
+ query_chunk = "#{test_table} | extend r = parse_json(record) | where r.chunk_id == '#{cid}'"
431
+ chunk_rows = wait_for_ingestion(query_chunk, expected_count)
432
+
433
+ assert(chunk_rows.size == expected_count,
434
+ "Not all chunk records were committed in Kusto for chunk_id #{cid} (expected #{expected_count}, got #{chunk_rows.size})")
435
+ end
436
+ end
437
+
438
+ # ESSENTIAL E2E BUFFERING TEST CASES - START
439
+
440
+ # Test Case 1: Non-buffered mode with compression disabled
441
+ test 'non_buffered_compression_disabled' do
442
+ table_name = "FluentD_non_buffered_no_compression_#{Time.now.to_i}"
443
+ configure_and_start_driver(
444
+ table_name: table_name,
445
+ buffered: false,
446
+ compression_enabled: false
447
+ )
448
+ setup_test_table(table_name)
449
+
450
+ tag = 'e2e.non_buffered.no_compression'
451
+ events = generate_test_events(3, 1000, 'no_comp')
452
+
453
+ events.each do |time, record|
454
+ event_stream = Fluent::ArrayEventStream.new([[time, record]])
455
+ assert_nothing_raised { @driver.instance.process(tag, event_stream) }
456
+ end
457
+
458
+ query = "#{table_name} | extend r = parse_json(record) | where r.id >= 1000 and r.id <= 1002"
459
+ rows = wait_for_ingestion(query, 3)
460
+
461
+ assert(rows.size >= 3, "Expected 3 records, got #{rows.size} in non-buffered mode with compression disabled")
462
+ end
463
+
464
+ # Test Case 2: Memory buffered mode with immediate flush
465
+ test 'memory_buffered_immediate_flush' do
466
+ table_name = "FluentD_memory_buffered_immediate_#{Time.now.to_i}"
467
+ configure_and_start_driver(
468
+ table_name: table_name,
469
+ buffered: true,
470
+ buffer_type: 'memory',
471
+ flush_mode: 'immediate'
472
+ )
473
+ setup_test_table(table_name)
474
+
475
+ tag = 'e2e.memory_buffered.immediate'
476
+ events = generate_test_events(5, 2000, 'mem_imm')
477
+
478
+ @driver.run(default_tag: tag) do
479
+ events.each do |time, record|
480
+ @driver.feed(tag, time, record)
481
+ end
482
+ sleep 3 # Allow time for immediate flush
483
+ end
484
+
485
+ query = "#{table_name} | extend r = parse_json(record) | where r.id >= 2000 and r.id <= 2004"
486
+ rows = wait_for_ingestion(query, 5)
487
+
488
+ assert(rows.size >= 5, "Expected 5 records, got #{rows.size} in memory buffered immediate flush")
489
+ end
490
+
491
+ # Test Case 3: Memory buffered mode with interval flush
492
+ test 'memory_buffered_interval_flush' do
493
+ table_name = "FluentD_memory_buffered_interval_#{Time.now.to_i}"
494
+ configure_and_start_driver(
495
+ table_name: table_name,
496
+ buffered: true,
497
+ buffer_type: 'memory',
498
+ flush_mode: 'interval',
499
+ flush_interval: '3s'
500
+ )
501
+ setup_test_table(table_name)
502
+
503
+ tag = 'e2e.memory_buffered.interval'
504
+ events = generate_test_events(7, 3000, 'mem_int')
505
+
506
+ @driver.run(default_tag: tag) do
507
+ events.each do |time, record|
508
+ @driver.feed(tag, time, record)
509
+ end
510
+ sleep 8 # Wait longer than flush_interval
511
+ end
512
+
513
+ query = "#{table_name} | extend r = parse_json(record) | where r.id >= 3000 and r.id <= 3006"
514
+ rows = wait_for_ingestion(query, 7)
515
+
516
+ assert(rows.size >= 7, "Expected 7 records, got #{rows.size} in memory buffered interval flush")
517
+ end
518
+
519
+ # Test Case 4: Memory buffered mode with chunk size limit
520
+ test 'memory_buffered_chunk_size_limit' do
521
+ table_name = "FluentD_memory_buffered_chunk_limit_#{Time.now.to_i}"
522
+ configure_and_start_driver(
523
+ table_name: table_name,
524
+ buffered: true,
525
+ buffer_type: 'memory',
526
+ chunk_limit_size: '512' # Small to force multiple chunks
527
+ )
528
+ setup_test_table(table_name)
529
+
530
+ tag = 'e2e.memory_buffered.chunk_limit'
531
+ # Create larger events to exceed chunk size quickly
532
+ events = []
533
+ 10.times do |i|
534
+ large_data = 'x' * 100 # Create large payload
535
+ events << [
536
+ Time.now.to_i + i,
537
+ {
538
+ 'id' => 4000 + i,
539
+ 'name' => "chunk_limit_test_#{i + 1}",
540
+ 'large_field' => large_data,
541
+ 'data' => { 'index' => i, 'test_type' => 'chunk_limit' }
542
+ }
543
+ ]
544
+ end
545
+
546
+ @driver.run(default_tag: tag) do
547
+ events.each do |time, record|
548
+ @driver.feed(tag, time, record)
549
+ end
550
+ sleep 8
551
+ end
552
+
553
+ query = "#{table_name} | extend r = parse_json(record) | where r.id >= 4000 and r.id <= 4009"
554
+ rows = wait_for_ingestion(query, 10)
555
+
556
+ assert(rows.size >= 10, "Expected 10 records, got #{rows.size} in chunk size limit test")
557
+ end
558
+
559
+ # Test Case 5: Delayed commit mode with sync verification
560
+ test 'delayed_commit_sync_verification' do
561
+ table_name = "FluentD_delayed_commit_sync_#{Time.now.to_i}"
562
+ configure_and_start_driver(
563
+ table_name: table_name,
564
+ buffered: true,
565
+ delayed: true,
566
+ flush_interval: '3s',
567
+ deferred_commit_timeout: 15
568
+ )
569
+ setup_test_table(table_name)
570
+
571
+ tag = 'e2e.delayed_commit.sync'
572
+ events = generate_test_events(4, 5000, 'delayed_sync')
573
+
574
+ @driver.run(default_tag: tag) do
575
+ events.each do |time, record|
576
+ @driver.feed(tag, time, record)
577
+ end
578
+ sleep 8
579
+ end
580
+
581
+ query = "#{table_name} | extend r = parse_json(record) | where r.id >= 5000 and r.id <= 5003"
582
+ rows = wait_for_ingestion(query, 4)
583
+
584
+ assert(rows.size >= 4, "Expected 4 records, got #{rows.size} in delayed commit sync mode")
585
+
586
+ # Verify chunk_id exists (added by delayed commit)
587
+ chunk_ids = rows.map { |row| row[3]['chunk_id'] if row[3] }.compact.uniq
588
+ assert(chunk_ids.size >= 1, 'No chunk_ids found in delayed commit mode')
589
+ end
590
+
591
+ # Test Case 6: Delayed commit mode with multiple chunks
592
+ test 'delayed_commit_multiple_chunks' do
593
+ table_name = "FluentD_delayed_commit_multi_chunks_#{Time.now.to_i}"
594
+ configure_and_start_driver(
595
+ table_name: table_name,
596
+ buffered: true,
597
+ delayed: true,
598
+ chunk_limit_size: '300', # Small chunks to force multiple
599
+ flush_interval: '4s',
600
+ deferred_commit_timeout: 15
601
+ )
602
+ setup_test_table(table_name)
603
+
604
+ tag = 'e2e.delayed_commit.multi_chunks'
605
+ events = generate_test_events(12, 6000, 'multi_chunk')
606
+
607
+ @driver.run(default_tag: tag) do
608
+ events.each do |time, record|
609
+ @driver.feed(tag, time, record)
610
+ end
611
+ sleep 10
612
+ end
613
+
614
+ query = "#{table_name} | extend r = parse_json(record) | where r.id >= 6000 and r.id <= 6011"
615
+ rows = wait_for_ingestion(query, 12)
616
+
617
+ assert(rows.size >= 12, "Expected 12 records, got #{rows.size} in delayed commit multiple chunks")
618
+
619
+ # Verify multiple chunk_ids exist
620
+ chunk_ids = rows.map { |row| row[3]['chunk_id'] if row[3] }.compact.uniq
621
+ assert(chunk_ids.size >= 1, "Expected chunk_ids, got #{chunk_ids.size}")
622
+ end
623
+
624
+ # Test Case 7: File buffer with persistent storage
625
+ test 'file_buffer_persistent_storage' do
626
+ table_name = "FluentD_file_buffer_persistent_#{Time.now.to_i}"
627
+ buffer_path = create_temp_buffer_file
628
+ configure_and_start_driver(
629
+ table_name: table_name,
630
+ buffered: true,
631
+ buffer_type: 'file',
632
+ buffer_path: buffer_path,
633
+ flush_interval: '5s',
634
+ chunk_limit_size: '4k'
635
+ )
636
+ setup_test_table(table_name)
637
+
638
+ tag = 'e2e.file_buffer.persistent'
639
+ events = generate_test_events(6, 20_000, 'file_buf')
640
+
641
+ @driver.run(default_tag: tag) do
642
+ events.each do |time, record|
643
+ @driver.feed(tag, time, record)
644
+ end
645
+ sleep 8
646
+ end
647
+
648
+ query = "#{table_name} | extend r = parse_json(record) | where r.id >= 20000 and r.id <= 20005"
649
+ rows = wait_for_ingestion(query, 6)
650
+
651
+ assert(rows.size >= 6, "Expected 6 records, got #{rows.size} in file buffer persistent storage test")
652
+ end
653
+
654
+ # Test Case 8: Buffered mode with compression enabled
655
+ test 'buffered_mode_compression_enabled' do
656
+ table_name = "FluentD_buffered_compression_#{Time.now.to_i}"
657
+ configure_and_start_driver(
658
+ table_name: table_name,
659
+ buffered: true,
660
+ compression_enabled: true,
661
+ flush_interval: '4s',
662
+ chunk_limit_size: '8k'
663
+ )
664
+ setup_test_table(table_name)
665
+
666
+ tag = 'e2e.buffered.compression'
667
+ events = generate_test_events(10, 7000, 'compression')
668
+
669
+ @driver.run(default_tag: tag) do
670
+ events.each do |time, record|
671
+ @driver.feed(tag, time, record)
672
+ end
673
+ sleep 8
674
+ end
675
+
676
+ query = "#{table_name} | extend r = parse_json(record) | where r.id >= 7000 and r.id <= 7009"
677
+ rows = wait_for_ingestion(query, 10)
678
+
679
+ assert(rows.size >= 10, "Expected 10 records, got #{rows.size} in compression test")
680
+ end
681
+
682
+ # ESSENTIAL E2E BUFFERING TEST CASES - END
683
+ end