@cdklabs/cdk-appmod-catalog-blueprints 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.jsii +2579 -194
  2. package/lib/document-processing/adapter/adapter.d.ts +4 -2
  3. package/lib/document-processing/adapter/adapter.js +1 -1
  4. package/lib/document-processing/adapter/queued-s3-adapter.d.ts +9 -2
  5. package/lib/document-processing/adapter/queued-s3-adapter.js +29 -15
  6. package/lib/document-processing/agentic-document-processing.d.ts +4 -0
  7. package/lib/document-processing/agentic-document-processing.js +20 -10
  8. package/lib/document-processing/base-document-processing.d.ts +54 -2
  9. package/lib/document-processing/base-document-processing.js +136 -82
  10. package/lib/document-processing/bedrock-document-processing.d.ts +202 -2
  11. package/lib/document-processing/bedrock-document-processing.js +717 -77
  12. package/lib/document-processing/chunking-config.d.ts +614 -0
  13. package/lib/document-processing/chunking-config.js +5 -0
  14. package/lib/document-processing/default-document-processing-config.js +1 -1
  15. package/lib/document-processing/index.d.ts +1 -0
  16. package/lib/document-processing/index.js +2 -1
  17. package/lib/document-processing/resources/aggregation/handler.py +567 -0
  18. package/lib/document-processing/resources/aggregation/requirements.txt +7 -0
  19. package/lib/document-processing/resources/aggregation/test_handler.py +362 -0
  20. package/lib/document-processing/resources/cleanup/handler.py +276 -0
  21. package/lib/document-processing/resources/cleanup/requirements.txt +5 -0
  22. package/lib/document-processing/resources/cleanup/test_handler.py +436 -0
  23. package/lib/document-processing/resources/default-bedrock-invoke/index.py +85 -3
  24. package/lib/document-processing/resources/default-bedrock-invoke/test_index.py +622 -0
  25. package/lib/document-processing/resources/pdf-chunking/README.md +313 -0
  26. package/lib/document-processing/resources/pdf-chunking/chunking_strategies.py +460 -0
  27. package/lib/document-processing/resources/pdf-chunking/error_handling.py +491 -0
  28. package/lib/document-processing/resources/pdf-chunking/handler.py +958 -0
  29. package/lib/document-processing/resources/pdf-chunking/metrics.py +435 -0
  30. package/lib/document-processing/resources/pdf-chunking/requirements.txt +3 -0
  31. package/lib/document-processing/resources/pdf-chunking/strategy_selection.py +420 -0
  32. package/lib/document-processing/resources/pdf-chunking/structured_logging.py +457 -0
  33. package/lib/document-processing/resources/pdf-chunking/test_chunking_strategies.py +353 -0
  34. package/lib/document-processing/resources/pdf-chunking/test_error_handling.py +487 -0
  35. package/lib/document-processing/resources/pdf-chunking/test_handler.py +609 -0
  36. package/lib/document-processing/resources/pdf-chunking/test_integration.py +694 -0
  37. package/lib/document-processing/resources/pdf-chunking/test_metrics.py +532 -0
  38. package/lib/document-processing/resources/pdf-chunking/test_strategy_selection.py +471 -0
  39. package/lib/document-processing/resources/pdf-chunking/test_structured_logging.py +449 -0
  40. package/lib/document-processing/resources/pdf-chunking/test_token_estimation.py +374 -0
  41. package/lib/document-processing/resources/pdf-chunking/token_estimation.py +189 -0
  42. package/lib/document-processing/tests/agentic-document-processing-nag.test.js +4 -3
  43. package/lib/document-processing/tests/agentic-document-processing.test.js +488 -4
  44. package/lib/document-processing/tests/base-document-processing-nag.test.js +9 -2
  45. package/lib/document-processing/tests/base-document-processing-schema.test.d.ts +1 -0
  46. package/lib/document-processing/tests/base-document-processing-schema.test.js +337 -0
  47. package/lib/document-processing/tests/base-document-processing.test.js +114 -8
  48. package/lib/document-processing/tests/bedrock-document-processing-chunking-nag.test.d.ts +1 -0
  49. package/lib/document-processing/tests/bedrock-document-processing-chunking-nag.test.js +382 -0
  50. package/lib/document-processing/tests/bedrock-document-processing-nag.test.js +4 -3
  51. package/lib/document-processing/tests/bedrock-document-processing-security.test.d.ts +1 -0
  52. package/lib/document-processing/tests/bedrock-document-processing-security.test.js +389 -0
  53. package/lib/document-processing/tests/bedrock-document-processing.test.js +808 -8
  54. package/lib/document-processing/tests/chunking-config.test.d.ts +1 -0
  55. package/lib/document-processing/tests/chunking-config.test.js +238 -0
  56. package/lib/document-processing/tests/queued-s3-adapter-nag.test.js +9 -2
  57. package/lib/document-processing/tests/queued-s3-adapter.test.js +17 -6
  58. package/lib/framework/agents/base-agent.js +1 -1
  59. package/lib/framework/agents/batch-agent.js +1 -1
  60. package/lib/framework/agents/default-agent-config.js +1 -1
  61. package/lib/framework/bedrock/bedrock.js +1 -1
  62. package/lib/framework/custom-resource/default-runtimes.js +1 -1
  63. package/lib/framework/foundation/access-log.js +1 -1
  64. package/lib/framework/foundation/eventbridge-broker.js +1 -1
  65. package/lib/framework/foundation/network.d.ts +4 -2
  66. package/lib/framework/foundation/network.js +52 -41
  67. package/lib/framework/tests/access-log.test.js +5 -2
  68. package/lib/framework/tests/batch-agent.test.js +5 -2
  69. package/lib/framework/tests/bedrock.test.js +5 -2
  70. package/lib/framework/tests/eventbridge-broker.test.js +5 -2
  71. package/lib/framework/tests/framework-nag.test.js +26 -7
  72. package/lib/framework/tests/network.test.js +30 -2
  73. package/lib/tsconfig.tsbuildinfo +1 -1
  74. package/lib/utilities/data-loader.js +1 -1
  75. package/lib/utilities/lambda-iam-utils.js +1 -1
  76. package/lib/utilities/observability/cloudfront-distribution-observability-property-injector.js +1 -1
  77. package/lib/utilities/observability/default-observability-config.js +1 -1
  78. package/lib/utilities/observability/lambda-observability-property-injector.js +1 -1
  79. package/lib/utilities/observability/log-group-data-protection-utils.js +1 -1
  80. package/lib/utilities/observability/powertools-config.d.ts +10 -1
  81. package/lib/utilities/observability/powertools-config.js +19 -3
  82. package/lib/utilities/observability/state-machine-observability-property-injector.js +1 -1
  83. package/lib/utilities/test-utils.d.ts +43 -0
  84. package/lib/utilities/test-utils.js +56 -0
  85. package/lib/utilities/tests/data-loader-nag.test.js +3 -2
  86. package/lib/utilities/tests/data-loader.test.js +3 -2
  87. package/lib/webapp/frontend-construct.js +1 -1
  88. package/lib/webapp/tests/frontend-construct-nag.test.js +3 -2
  89. package/lib/webapp/tests/frontend-construct.test.js +3 -2
  90. package/package.json +6 -5
  91. package/lib/document-processing/resources/default-error-handler/index.js +0 -46
  92. package/lib/document-processing/resources/default-pdf-processor/index.js +0 -46
  93. package/lib/document-processing/resources/default-pdf-validator/index.js +0 -36
@@ -5,8 +5,10 @@ const assertions_1 = require("aws-cdk-lib/assertions");
5
5
  const aws_bedrock_1 = require("aws-cdk-lib/aws-bedrock");
6
6
  const aws_lambda_1 = require("aws-cdk-lib/aws-lambda");
7
7
  const framework_1 = require("../../framework");
8
+ const test_utils_1 = require("../../utilities/test-utils");
8
9
  const bedrock_document_processing_1 = require("../bedrock-document-processing");
9
10
  describe('BedrockDocumentProcessing', () => {
11
+ let app;
10
12
  let defaultStack;
11
13
  let customStack;
12
14
  let crossRegionStack;
@@ -14,6 +16,7 @@ describe('BedrockDocumentProcessing', () => {
14
16
  let postProcessingStack;
15
17
  let customPromptsStack;
16
18
  let customTimeoutStack;
19
+ let chunkingStack;
17
20
  let defaultTemplate;
18
21
  let customTemplate;
19
22
  let crossRegionTemplate;
@@ -21,10 +24,13 @@ describe('BedrockDocumentProcessing', () => {
21
24
  let postProcessingTemplate;
22
25
  let customPromptsTemplate;
23
26
  let customTimeoutTemplate;
27
+ let chunkingTemplate;
24
28
  beforeAll(() => {
25
- defaultStack = new aws_cdk_lib_1.Stack();
29
+ // Use createTestApp() to skip bundling and speed up tests
30
+ app = (0, test_utils_1.createTestApp)();
31
+ defaultStack = new aws_cdk_lib_1.Stack(app, 'DefaultStack');
26
32
  new bedrock_document_processing_1.BedrockDocumentProcessing(defaultStack, 'DefaultTest', {});
27
- customStack = new aws_cdk_lib_1.Stack();
33
+ customStack = new aws_cdk_lib_1.Stack(app, 'CustomStack');
28
34
  new bedrock_document_processing_1.BedrockDocumentProcessing(customStack, 'CustomTest', {
29
35
  classificationBedrockModel: {
30
36
  fmModelId: aws_bedrock_1.FoundationModelIdentifier.ANTHROPIC_CLAUDE_3_HAIKU_20240307_V1_0,
@@ -35,14 +41,14 @@ describe('BedrockDocumentProcessing', () => {
35
41
  useCrossRegionInference: false,
36
42
  },
37
43
  });
38
- crossRegionStack = new aws_cdk_lib_1.Stack();
44
+ crossRegionStack = new aws_cdk_lib_1.Stack(app, 'CrossRegionStack');
39
45
  new bedrock_document_processing_1.BedrockDocumentProcessing(crossRegionStack, 'CrossRegionTest', {
40
46
  classificationBedrockModel: {
41
47
  useCrossRegionInference: true,
42
48
  crossRegionInferencePrefix: framework_1.BedrockCrossRegionInferencePrefix.EU,
43
49
  },
44
50
  });
45
- enrichmentStack = new aws_cdk_lib_1.Stack();
51
+ enrichmentStack = new aws_cdk_lib_1.Stack(app, 'EnrichmentStack');
46
52
  const enrichmentFn = new aws_lambda_1.Function(enrichmentStack, 'EnrichmentFn', {
47
53
  runtime: aws_lambda_1.Runtime.NODEJS_20_X,
48
54
  handler: 'index.handler',
@@ -51,7 +57,7 @@ describe('BedrockDocumentProcessing', () => {
51
57
  new bedrock_document_processing_1.BedrockDocumentProcessing(enrichmentStack, 'EnrichmentTest', {
52
58
  enrichmentLambdaFunction: enrichmentFn,
53
59
  });
54
- postProcessingStack = new aws_cdk_lib_1.Stack();
60
+ postProcessingStack = new aws_cdk_lib_1.Stack(app, 'PostProcessingStack');
55
61
  const postProcessingFn = new aws_lambda_1.Function(postProcessingStack, 'PostProcessingFn', {
56
62
  runtime: aws_lambda_1.Runtime.NODEJS_20_X,
57
63
  handler: 'index.handler',
@@ -60,15 +66,28 @@ describe('BedrockDocumentProcessing', () => {
60
66
  new bedrock_document_processing_1.BedrockDocumentProcessing(postProcessingStack, 'PostProcessingTest', {
61
67
  postProcessingLambdaFunction: postProcessingFn,
62
68
  });
63
- customPromptsStack = new aws_cdk_lib_1.Stack();
69
+ customPromptsStack = new aws_cdk_lib_1.Stack(app, 'CustomPromptsStack');
64
70
  new bedrock_document_processing_1.BedrockDocumentProcessing(customPromptsStack, 'CustomPromptsTest', {
65
71
  classificationPrompt: 'Custom classification prompt',
66
72
  processingPrompt: 'Custom processing prompt',
67
73
  });
68
- customTimeoutStack = new aws_cdk_lib_1.Stack();
74
+ customTimeoutStack = new aws_cdk_lib_1.Stack(app, 'CustomTimeoutStack');
69
75
  new bedrock_document_processing_1.BedrockDocumentProcessing(customTimeoutStack, 'CustomTimeoutTest', {
70
76
  stepTimeouts: aws_cdk_lib_1.Duration.minutes(10),
71
77
  });
78
+ chunkingStack = new aws_cdk_lib_1.Stack(app, 'ChunkingStack');
79
+ new bedrock_document_processing_1.BedrockDocumentProcessing(chunkingStack, 'ChunkingTest', {
80
+ enableChunking: true,
81
+ chunkingConfig: {
82
+ strategy: 'hybrid',
83
+ maxPagesPerChunk: 100,
84
+ targetTokensPerChunk: 80000,
85
+ pageThreshold: 100,
86
+ tokenThreshold: 150000,
87
+ processingMode: 'parallel',
88
+ maxConcurrency: 10,
89
+ },
90
+ });
72
91
  defaultTemplate = assertions_1.Template.fromStack(defaultStack);
73
92
  customTemplate = assertions_1.Template.fromStack(customStack);
74
93
  crossRegionTemplate = assertions_1.Template.fromStack(crossRegionStack);
@@ -76,6 +95,7 @@ describe('BedrockDocumentProcessing', () => {
76
95
  postProcessingTemplate = assertions_1.Template.fromStack(postProcessingStack);
77
96
  customPromptsTemplate = assertions_1.Template.fromStack(customPromptsStack);
78
97
  customTimeoutTemplate = assertions_1.Template.fromStack(customTimeoutStack);
98
+ chunkingTemplate = assertions_1.Template.fromStack(chunkingStack);
79
99
  });
80
100
  describe('Basic infrastructure', () => {
81
101
  test('creates basic infrastructure', () => {
@@ -256,5 +276,785 @@ describe('BedrockDocumentProcessing', () => {
256
276
  });
257
277
  });
258
278
  });
279
+ describe('Chunking configuration', () => {
280
+ test('accepts enableChunking prop', () => {
281
+ // This test verifies that the BedrockDocumentProcessingProps interface
282
+ // correctly accepts the enableChunking boolean flag
283
+ expect(chunkingStack).toBeDefined();
284
+ });
285
+ test('accepts chunkingConfig prop with all strategy options', () => {
286
+ // This test verifies that the BedrockDocumentProcessingProps interface
287
+ // correctly accepts the chunkingConfig with all configuration options
288
+ chunkingTemplate.hasResourceProperties('AWS::S3::Bucket', {});
289
+ chunkingTemplate.hasResourceProperties('AWS::SQS::Queue', {});
290
+ chunkingTemplate.hasResourceProperties('AWS::DynamoDB::Table', { BillingMode: 'PAY_PER_REQUEST' });
291
+ });
292
+ test('accepts fixed-pages strategy configuration', () => {
293
+ const testApp = (0, test_utils_1.createTestApp)();
294
+ const fixedPagesStack = new aws_cdk_lib_1.Stack(testApp, 'FixedPagesStack');
295
+ new bedrock_document_processing_1.BedrockDocumentProcessing(fixedPagesStack, 'FixedPagesTest', {
296
+ enableChunking: true,
297
+ chunkingConfig: {
298
+ strategy: 'fixed-pages',
299
+ pageThreshold: 100,
300
+ chunkSize: 50,
301
+ overlapPages: 5,
302
+ },
303
+ });
304
+ // Verify stack is created successfully
305
+ expect(fixedPagesStack).toBeDefined();
306
+ });
307
+ test('accepts token-based strategy configuration', () => {
308
+ const testApp = (0, test_utils_1.createTestApp)();
309
+ const tokenBasedStack = new aws_cdk_lib_1.Stack(testApp, 'TokenBasedStack');
310
+ new bedrock_document_processing_1.BedrockDocumentProcessing(tokenBasedStack, 'TokenBasedTest', {
311
+ enableChunking: true,
312
+ chunkingConfig: {
313
+ strategy: 'token-based',
314
+ tokenThreshold: 150000,
315
+ maxTokensPerChunk: 100000,
316
+ overlapTokens: 5000,
317
+ },
318
+ });
319
+ // Verify stack is created successfully
320
+ expect(tokenBasedStack).toBeDefined();
321
+ });
322
+ test('accepts hybrid strategy configuration', () => {
323
+ const testApp = (0, test_utils_1.createTestApp)();
324
+ const hybridStack = new aws_cdk_lib_1.Stack(testApp, 'HybridStack');
325
+ new bedrock_document_processing_1.BedrockDocumentProcessing(hybridStack, 'HybridTest', {
326
+ enableChunking: true,
327
+ chunkingConfig: {
328
+ strategy: 'hybrid',
329
+ maxPagesPerChunk: 100,
330
+ targetTokensPerChunk: 80000,
331
+ pageThreshold: 100,
332
+ tokenThreshold: 150000,
333
+ overlapTokens: 5000,
334
+ },
335
+ });
336
+ // Verify stack is created successfully
337
+ expect(hybridStack).toBeDefined();
338
+ });
339
+ test('accepts default chunking configuration', () => {
340
+ const testApp = (0, test_utils_1.createTestApp)();
341
+ const defaultChunkingStack = new aws_cdk_lib_1.Stack(testApp, 'DefaultChunkingStack');
342
+ new bedrock_document_processing_1.BedrockDocumentProcessing(defaultChunkingStack, 'DefaultChunkingTest', {
343
+ enableChunking: true,
344
+ });
345
+ // Verify stack is created successfully with default config
346
+ expect(defaultChunkingStack).toBeDefined();
347
+ });
348
+ test('does not enable chunking when enableChunking is false', () => {
349
+ // Verify that the default stack (without chunking) works as expected
350
+ expect(defaultStack).toBeDefined();
351
+ defaultTemplate.hasResourceProperties('AWS::S3::Bucket', {});
352
+ defaultTemplate.hasResourceProperties('AWS::SQS::Queue', {});
353
+ });
354
+ });
355
+ describe('Workflow branching for chunking', () => {
356
+ test('creates state machine with chunking workflow', () => {
357
+ // Verify that a state machine is created when chunking is enabled
358
+ chunkingTemplate.resourceCountIs('AWS::StepFunctions::StateMachine', 1);
359
+ // Verify the state machine has the expected configuration
360
+ chunkingTemplate.hasResourceProperties('AWS::StepFunctions::StateMachine', {
361
+ EncryptionConfiguration: assertions_1.Match.objectLike({
362
+ Type: 'CUSTOMER_MANAGED_KMS_KEY',
363
+ }),
364
+ });
365
+ });
366
+ test('standard workflow is used when chunking is disabled', () => {
367
+ // Verify that when enableChunking is false, the standard workflow is used
368
+ const testApp = (0, test_utils_1.createTestApp)();
369
+ const noChunkingStack = new aws_cdk_lib_1.Stack(testApp, 'NoChunkingStack');
370
+ new bedrock_document_processing_1.BedrockDocumentProcessing(noChunkingStack, 'NoChunkingTest', {
371
+ enableChunking: false,
372
+ });
373
+ const noChunkingTemplate = assertions_1.Template.fromStack(noChunkingStack);
374
+ // Should have state machine
375
+ noChunkingTemplate.resourceCountIs('AWS::StepFunctions::StateMachine', 1);
376
+ // Should NOT have PDF chunking Lambda (Python 3.13 with 2048 MB)
377
+ const lambdas = noChunkingTemplate.findResources('AWS::Lambda::Function');
378
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
379
+ lambda.Properties.MemorySize === 2048);
380
+ expect(chunkingLambda).toBeUndefined();
381
+ });
382
+ test('creates PDF chunking Lambda when chunking is enabled', () => {
383
+ // Verify that the PDF chunking Lambda is created
384
+ const lambdas = chunkingTemplate.findResources('AWS::Lambda::Function');
385
+ // Find the chunking Lambda by its characteristics (Python 3.13, 2048 MB memory)
386
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
387
+ lambda.Properties.MemorySize === 2048 &&
388
+ lambda.Properties.Timeout === 600);
389
+ expect(chunkingLambda).toBeDefined();
390
+ expect(chunkingLambda).toHaveProperty('Properties.Handler', 'handler.handler');
391
+ });
392
+ test('PDF chunking Lambda has correct environment variables for hybrid strategy', () => {
393
+ // Verify chunking configuration is passed via environment variables
394
+ // Find the chunking Lambda specifically
395
+ const lambdas = chunkingTemplate.findResources('AWS::Lambda::Function');
396
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
397
+ lambda.Properties.MemorySize === 2048);
398
+ expect(chunkingLambda).toBeDefined();
399
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.CHUNKING_STRATEGY', 'hybrid');
400
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.MAX_PAGES_PER_CHUNK', '100');
401
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.TARGET_TOKENS_PER_CHUNK', '80000');
402
+ });
403
+ test('PDF chunking Lambda has S3 access permissions', () => {
404
+ // Verify that the chunking Lambda has S3 permissions
405
+ chunkingTemplate.hasResourceProperties('AWS::IAM::Role', {
406
+ Policies: assertions_1.Match.arrayWith([
407
+ assertions_1.Match.objectLike({
408
+ PolicyDocument: assertions_1.Match.objectLike({
409
+ Statement: assertions_1.Match.arrayWith([
410
+ assertions_1.Match.objectLike({
411
+ Action: assertions_1.Match.arrayWith([
412
+ 's3:GetObject',
413
+ 's3:PutObject',
414
+ ]),
415
+ }),
416
+ ]),
417
+ }),
418
+ }),
419
+ ]),
420
+ });
421
+ });
422
+ test('state machine can invoke PDF chunking Lambda', () => {
423
+ // Verify that the state machine role has permission to invoke the chunking Lambda
424
+ chunkingTemplate.hasResourceProperties('AWS::IAM::Policy', {
425
+ PolicyDocument: assertions_1.Match.objectLike({
426
+ Statement: assertions_1.Match.arrayWith([
427
+ assertions_1.Match.objectLike({
428
+ Action: 'lambda:InvokeFunction',
429
+ Effect: 'Allow',
430
+ }),
431
+ ]),
432
+ }),
433
+ });
434
+ });
435
+ });
436
+ });
437
+ describe('Chunk-aware classification integration', () => {
438
+ describe('Classification Lambda chunk context', () => {
439
+ test('classification Lambda receives chunk metadata in Map State', () => {
440
+ // Verify that the Map State passes chunk metadata to the classification Lambda
441
+ const testApp = (0, test_utils_1.createTestApp)();
442
+ const chunkingStack = new aws_cdk_lib_1.Stack(testApp, 'ChunkContextStack');
443
+ new bedrock_document_processing_1.BedrockDocumentProcessing(chunkingStack, 'ChunkContextTest', {
444
+ enableChunking: true,
445
+ chunkingConfig: {
446
+ strategy: 'hybrid',
447
+ maxPagesPerChunk: 100,
448
+ targetTokensPerChunk: 80000,
449
+ },
450
+ });
451
+ const template = assertions_1.Template.fromStack(chunkingStack);
452
+ // Verify state machine is created with Map State
453
+ template.hasResourceProperties('AWS::StepFunctions::StateMachine', {
454
+ DefinitionString: assertions_1.Match.objectLike({
455
+ 'Fn::Join': assertions_1.Match.arrayWith(['']),
456
+ }),
457
+ });
458
+ // Verify the state machine definition includes Map state with chunk parameters
459
+ const stateMachines = template.findResources('AWS::StepFunctions::StateMachine');
460
+ const stateMachineDefinition = Object.values(stateMachines)[0];
461
+ expect(stateMachineDefinition).toBeDefined();
462
+ });
463
+ test('non-chunked workflow does not include chunk context', () => {
464
+ // Verify that when chunking is disabled, no chunk context is added
465
+ const testApp = (0, test_utils_1.createTestApp)();
466
+ const noChunkingStack = new aws_cdk_lib_1.Stack(testApp, 'NoChunkContextStack');
467
+ new bedrock_document_processing_1.BedrockDocumentProcessing(noChunkingStack, 'NoChunkContextTest', {
468
+ enableChunking: false,
469
+ });
470
+ const template = assertions_1.Template.fromStack(noChunkingStack);
471
+ // Should have state machine
472
+ template.resourceCountIs('AWS::StepFunctions::StateMachine', 1);
473
+ // Should NOT have PDF chunking Lambda
474
+ const lambdas = template.findResources('AWS::Lambda::Function');
475
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
476
+ lambda.Properties.MemorySize === 2048);
477
+ expect(chunkingLambda).toBeUndefined();
478
+ });
479
+ test('chunked workflow includes Map State for parallel chunk processing', () => {
480
+ const testApp = (0, test_utils_1.createTestApp)();
481
+ const chunkingStack = new aws_cdk_lib_1.Stack(testApp, 'MapStateStack');
482
+ new bedrock_document_processing_1.BedrockDocumentProcessing(chunkingStack, 'MapStateTest', {
483
+ enableChunking: true,
484
+ chunkingConfig: {
485
+ strategy: 'hybrid',
486
+ processingMode: 'parallel',
487
+ maxConcurrency: 5,
488
+ },
489
+ });
490
+ const template = assertions_1.Template.fromStack(chunkingStack);
491
+ // Verify state machine is created
492
+ template.resourceCountIs('AWS::StepFunctions::StateMachine', 1);
493
+ // Verify classification Lambda exists
494
+ template.hasResourceProperties('AWS::Lambda::Function', {
495
+ Environment: {
496
+ Variables: {
497
+ INVOKE_TYPE: 'classification',
498
+ },
499
+ },
500
+ });
501
+ });
502
+ test('classification Lambda is reused for both chunked and non-chunked paths', () => {
503
+ const testApp = (0, test_utils_1.createTestApp)();
504
+ const chunkingStack = new aws_cdk_lib_1.Stack(testApp, 'ReuseStack');
505
+ new bedrock_document_processing_1.BedrockDocumentProcessing(chunkingStack, 'ReuseTest', {
506
+ enableChunking: true,
507
+ });
508
+ const template = assertions_1.Template.fromStack(chunkingStack);
509
+ // Count classification Lambdas (should be 1, reused for both paths)
510
+ const lambdas = template.findResources('AWS::Lambda::Function');
511
+ const classificationLambdas = Object.values(lambdas).filter((lambda) => lambda.Properties.Environment?.Variables?.INVOKE_TYPE === 'classification');
512
+ // Should have exactly 1 classification Lambda (reused)
513
+ expect(classificationLambdas.length).toBe(1);
514
+ });
515
+ test('workflow maintains backward compatibility for small documents', () => {
516
+ // When chunking is enabled but document is small, it should use standard workflow
517
+ const testApp = (0, test_utils_1.createTestApp)();
518
+ const chunkingStack = new aws_cdk_lib_1.Stack(testApp, 'BackwardCompatStack');
519
+ new bedrock_document_processing_1.BedrockDocumentProcessing(chunkingStack, 'BackwardCompatTest', {
520
+ enableChunking: true,
521
+ chunkingConfig: {
522
+ strategy: 'hybrid',
523
+ pageThreshold: 100,
524
+ tokenThreshold: 150000,
525
+ },
526
+ });
527
+ const template = assertions_1.Template.fromStack(chunkingStack);
528
+ // Verify both classification and processing Lambdas exist
529
+ template.hasResourceProperties('AWS::Lambda::Function', {
530
+ Environment: {
531
+ Variables: {
532
+ INVOKE_TYPE: 'classification',
533
+ },
534
+ },
535
+ });
536
+ template.hasResourceProperties('AWS::Lambda::Function', {
537
+ Environment: {
538
+ Variables: {
539
+ INVOKE_TYPE: 'processing',
540
+ },
541
+ },
542
+ });
543
+ // Verify state machine has Choice state for branching
544
+ template.resourceCountIs('AWS::StepFunctions::StateMachine', 1);
545
+ });
546
+ });
547
+ describe('Accuracy comparison between chunked and non-chunked', () => {
548
+ test('both workflows use same classification Lambda function', () => {
549
+ // This ensures accuracy is maintained because the same model and prompt are used
550
+ const testApp = (0, test_utils_1.createTestApp)();
551
+ const chunkingStack = new aws_cdk_lib_1.Stack(testApp, 'AccuracyStack');
552
+ new bedrock_document_processing_1.BedrockDocumentProcessing(chunkingStack, 'AccuracyTest', {
553
+ enableChunking: true,
554
+ classificationBedrockModel: {
555
+ fmModelId: aws_bedrock_1.FoundationModelIdentifier.ANTHROPIC_CLAUDE_3_HAIKU_20240307_V1_0,
556
+ useCrossRegionInference: false,
557
+ },
558
+ });
559
+ const template = assertions_1.Template.fromStack(chunkingStack);
560
+ // Verify the classification Lambda uses the specified model
561
+ template.hasResourceProperties('AWS::Lambda::Function', {
562
+ Environment: {
563
+ Variables: {
564
+ MODEL_ID: 'anthropic.claude-3-haiku-20240307-v1:0',
565
+ INVOKE_TYPE: 'classification',
566
+ },
567
+ },
568
+ });
569
+ });
570
+ test('custom classification prompt is used for both chunked and non-chunked', () => {
571
+ const customPrompt = 'Custom classification prompt for testing';
572
+ const testApp = (0, test_utils_1.createTestApp)();
573
+ const chunkingStack = new aws_cdk_lib_1.Stack(testApp, 'CustomPromptStack');
574
+ new bedrock_document_processing_1.BedrockDocumentProcessing(chunkingStack, 'CustomPromptTest', {
575
+ enableChunking: true,
576
+ classificationPrompt: customPrompt,
577
+ });
578
+ const template = assertions_1.Template.fromStack(chunkingStack);
579
+ // Verify the custom prompt is set
580
+ template.hasResourceProperties('AWS::Lambda::Function', {
581
+ Environment: {
582
+ Variables: {
583
+ PROMPT: customPrompt,
584
+ INVOKE_TYPE: 'classification',
585
+ },
586
+ },
587
+ });
588
+ });
589
+ });
590
+ });
591
+ describe('Configuration validation', () => {
592
+ describe('Invalid configuration rejection', () => {
593
+ test('throws error for chunkSize <= 0', () => {
594
+ const testApp = (0, test_utils_1.createTestApp)();
595
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidChunkSizeStack');
596
+ expect(() => {
597
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidChunkSizeTest', {
598
+ enableChunking: true,
599
+ chunkingConfig: {
600
+ strategy: 'fixed-pages',
601
+ chunkSize: 0,
602
+ },
603
+ });
604
+ }).toThrow('ChunkingConfig validation error: chunkSize must be greater than 0');
605
+ });
606
+ test('throws error for negative chunkSize', () => {
607
+ const testApp = (0, test_utils_1.createTestApp)();
608
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'NegativeChunkSizeStack');
609
+ expect(() => {
610
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'NegativeChunkSizeTest', {
611
+ enableChunking: true,
612
+ chunkingConfig: {
613
+ strategy: 'fixed-pages',
614
+ chunkSize: -10,
615
+ },
616
+ });
617
+ }).toThrow('ChunkingConfig validation error: chunkSize must be greater than 0');
618
+ });
619
+ test('throws error for negative overlapPages', () => {
620
+ const testApp = (0, test_utils_1.createTestApp)();
621
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'NegativeOverlapStack');
622
+ expect(() => {
623
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'NegativeOverlapTest', {
624
+ enableChunking: true,
625
+ chunkingConfig: {
626
+ strategy: 'fixed-pages',
627
+ chunkSize: 50,
628
+ overlapPages: -5,
629
+ },
630
+ });
631
+ }).toThrow('ChunkingConfig validation error: overlapPages must be non-negative');
632
+ });
633
+ test('throws error for overlapPages >= chunkSize', () => {
634
+ const testApp = (0, test_utils_1.createTestApp)();
635
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'OverlapTooLargeStack');
636
+ expect(() => {
637
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'OverlapTooLargeTest', {
638
+ enableChunking: true,
639
+ chunkingConfig: {
640
+ strategy: 'fixed-pages',
641
+ chunkSize: 50,
642
+ overlapPages: 50,
643
+ },
644
+ });
645
+ }).toThrow('ChunkingConfig validation error: overlapPages must be less than chunkSize');
646
+ });
647
+ test('throws error for pageThreshold <= 0', () => {
648
+ const testApp = (0, test_utils_1.createTestApp)();
649
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidPageThresholdStack');
650
+ expect(() => {
651
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidPageThresholdTest', {
652
+ enableChunking: true,
653
+ chunkingConfig: {
654
+ strategy: 'fixed-pages',
655
+ pageThreshold: 0,
656
+ },
657
+ });
658
+ }).toThrow('ChunkingConfig validation error: pageThreshold must be greater than 0');
659
+ });
660
+ test('throws error for tokenThreshold <= 0', () => {
661
+ const testApp = (0, test_utils_1.createTestApp)();
662
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidTokenThresholdStack');
663
+ expect(() => {
664
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidTokenThresholdTest', {
665
+ enableChunking: true,
666
+ chunkingConfig: {
667
+ strategy: 'token-based',
668
+ tokenThreshold: -100,
669
+ },
670
+ });
671
+ }).toThrow('ChunkingConfig validation error: tokenThreshold must be greater than 0');
672
+ });
673
+ test('throws error for maxTokensPerChunk <= 0', () => {
674
+ const testApp = (0, test_utils_1.createTestApp)();
675
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidMaxTokensStack');
676
+ expect(() => {
677
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidMaxTokensTest', {
678
+ enableChunking: true,
679
+ chunkingConfig: {
680
+ strategy: 'token-based',
681
+ maxTokensPerChunk: 0,
682
+ },
683
+ });
684
+ }).toThrow('ChunkingConfig validation error: maxTokensPerChunk must be greater than 0');
685
+ });
686
+ test('throws error for negative overlapTokens', () => {
687
+ const testApp = (0, test_utils_1.createTestApp)();
688
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'NegativeOverlapTokensStack');
689
+ expect(() => {
690
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'NegativeOverlapTokensTest', {
691
+ enableChunking: true,
692
+ chunkingConfig: {
693
+ strategy: 'token-based',
694
+ overlapTokens: -1000,
695
+ },
696
+ });
697
+ }).toThrow('ChunkingConfig validation error: overlapTokens must be non-negative');
698
+ });
699
+ test('throws error for overlapTokens >= maxTokensPerChunk', () => {
700
+ const testApp = (0, test_utils_1.createTestApp)();
701
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'OverlapTokensTooLargeStack');
702
+ expect(() => {
703
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'OverlapTokensTooLargeTest', {
704
+ enableChunking: true,
705
+ chunkingConfig: {
706
+ strategy: 'token-based',
707
+ maxTokensPerChunk: 50000,
708
+ overlapTokens: 50000,
709
+ },
710
+ });
711
+ }).toThrow('ChunkingConfig validation error: overlapTokens must be less than maxTokensPerChunk');
712
+ });
713
+ test('throws error for maxPagesPerChunk <= 0', () => {
714
+ const testApp = (0, test_utils_1.createTestApp)();
715
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidMaxPagesStack');
716
+ expect(() => {
717
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidMaxPagesTest', {
718
+ enableChunking: true,
719
+ chunkingConfig: {
720
+ strategy: 'hybrid',
721
+ maxPagesPerChunk: 0,
722
+ },
723
+ });
724
+ }).toThrow('ChunkingConfig validation error: maxPagesPerChunk must be greater than 0');
725
+ });
726
+ test('throws error for targetTokensPerChunk <= 0', () => {
727
+ const testApp = (0, test_utils_1.createTestApp)();
728
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidTargetTokensStack');
729
+ expect(() => {
730
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidTargetTokensTest', {
731
+ enableChunking: true,
732
+ chunkingConfig: {
733
+ strategy: 'hybrid',
734
+ targetTokensPerChunk: -1,
735
+ },
736
+ });
737
+ }).toThrow('ChunkingConfig validation error: targetTokensPerChunk must be greater than 0');
738
+ });
739
+ test('throws error for maxConcurrency <= 0', () => {
740
+ const testApp = (0, test_utils_1.createTestApp)();
741
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidMaxConcurrencyStack');
742
+ expect(() => {
743
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidMaxConcurrencyTest', {
744
+ enableChunking: true,
745
+ chunkingConfig: {
746
+ maxConcurrency: 0,
747
+ },
748
+ });
749
+ }).toThrow('ChunkingConfig validation error: maxConcurrency must be greater than 0');
750
+ });
751
+ test('throws error for minSuccessThreshold < 0', () => {
752
+ const testApp = (0, test_utils_1.createTestApp)();
753
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidMinThresholdLowStack');
754
+ expect(() => {
755
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidMinThresholdLowTest', {
756
+ enableChunking: true,
757
+ chunkingConfig: {
758
+ minSuccessThreshold: -0.1,
759
+ },
760
+ });
761
+ }).toThrow('ChunkingConfig validation error: minSuccessThreshold must be between 0 and 1');
762
+ });
763
+ test('throws error for minSuccessThreshold > 1', () => {
764
+ const testApp = (0, test_utils_1.createTestApp)();
765
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'InvalidMinThresholdHighStack');
766
+ expect(() => {
767
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'InvalidMinThresholdHighTest', {
768
+ enableChunking: true,
769
+ chunkingConfig: {
770
+ minSuccessThreshold: 1.5,
771
+ },
772
+ });
773
+ }).toThrow('ChunkingConfig validation error: minSuccessThreshold must be between 0 and 1');
774
+ });
775
+ });
776
+ describe('Valid configuration acceptance', () => {
777
+ test('accepts valid boundary values', () => {
778
+ const testApp = (0, test_utils_1.createTestApp)();
779
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'ValidBoundaryStack');
780
+ // Should not throw
781
+ expect(() => {
782
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'ValidBoundaryTest', {
783
+ enableChunking: true,
784
+ chunkingConfig: {
785
+ strategy: 'fixed-pages',
786
+ chunkSize: 1, // minimum valid
787
+ overlapPages: 0, // minimum valid
788
+ pageThreshold: 1, // minimum valid
789
+ },
790
+ });
791
+ }).not.toThrow();
792
+ });
793
+ test('accepts minSuccessThreshold at boundaries', () => {
794
+ const testApp = (0, test_utils_1.createTestApp)();
795
+ const stack1 = new aws_cdk_lib_1.Stack(testApp, 'MinThreshold0Stack');
796
+ const stack2 = new aws_cdk_lib_1.Stack(testApp, 'MinThreshold1Stack');
797
+ // Should not throw for 0
798
+ expect(() => {
799
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack1, 'MinThreshold0Test', {
800
+ enableChunking: true,
801
+ chunkingConfig: {
802
+ minSuccessThreshold: 0,
803
+ },
804
+ });
805
+ }).not.toThrow();
806
+ // Should not throw for 1
807
+ expect(() => {
808
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack2, 'MinThreshold1Test', {
809
+ enableChunking: true,
810
+ chunkingConfig: {
811
+ minSuccessThreshold: 1,
812
+ },
813
+ });
814
+ }).not.toThrow();
815
+ });
816
+ });
817
+ describe('Validation skipped when chunking disabled', () => {
818
+ test('does not validate config when enableChunking is false', () => {
819
+ const testApp = (0, test_utils_1.createTestApp)();
820
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'ChunkingDisabledStack');
821
+ // Should not throw even with invalid config because chunking is disabled
822
+ expect(() => {
823
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'ChunkingDisabledTest', {
824
+ enableChunking: false,
825
+ chunkingConfig: {
826
+ chunkSize: -10, // Invalid but should be ignored
827
+ },
828
+ });
829
+ }).not.toThrow();
830
+ });
831
+ test('does not validate config when enableChunking is undefined', () => {
832
+ const testApp = (0, test_utils_1.createTestApp)();
833
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'ChunkingUndefinedStack');
834
+ // Should not throw even with invalid config because chunking is not enabled
835
+ expect(() => {
836
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'ChunkingUndefinedTest', {
837
+ chunkingConfig: {
838
+ chunkSize: 0, // Invalid but should be ignored
839
+ },
840
+ });
841
+ }).not.toThrow();
842
+ });
843
+ });
844
+ });
845
+ describe('Configuration precedence', () => {
846
+ describe('Default values applied', () => {
847
+ test('uses default strategy when not specified', () => {
848
+ const testApp = (0, test_utils_1.createTestApp)();
849
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'DefaultStrategyStack');
850
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'DefaultStrategyTest', {
851
+ enableChunking: true,
852
+ chunkingConfig: {}, // Empty config, should use defaults
853
+ });
854
+ const template = assertions_1.Template.fromStack(stack);
855
+ const lambdas = template.findResources('AWS::Lambda::Function');
856
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
857
+ lambda.Properties.MemorySize === 2048);
858
+ expect(chunkingLambda).toBeDefined();
859
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.CHUNKING_STRATEGY', 'hybrid');
860
+ });
861
+ test('uses default pageThreshold when not specified', () => {
862
+ const testApp = (0, test_utils_1.createTestApp)();
863
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'DefaultPageThresholdStack');
864
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'DefaultPageThresholdTest', {
865
+ enableChunking: true,
866
+ chunkingConfig: {
867
+ strategy: 'fixed-pages',
868
+ },
869
+ });
870
+ const template = assertions_1.Template.fromStack(stack);
871
+ const lambdas = template.findResources('AWS::Lambda::Function');
872
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
873
+ lambda.Properties.MemorySize === 2048);
874
+ expect(chunkingLambda).toBeDefined();
875
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.PAGE_THRESHOLD', '100');
876
+ });
877
+ test('uses default tokenThreshold when not specified', () => {
878
+ const testApp = (0, test_utils_1.createTestApp)();
879
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'DefaultTokenThresholdStack');
880
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'DefaultTokenThresholdTest', {
881
+ enableChunking: true,
882
+ chunkingConfig: {
883
+ strategy: 'token-based',
884
+ },
885
+ });
886
+ const template = assertions_1.Template.fromStack(stack);
887
+ const lambdas = template.findResources('AWS::Lambda::Function');
888
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
889
+ lambda.Properties.MemorySize === 2048);
890
+ expect(chunkingLambda).toBeDefined();
891
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.TOKEN_THRESHOLD', '150000');
892
+ });
893
+ test('uses default maxConcurrency when not specified', () => {
894
+ const testApp = (0, test_utils_1.createTestApp)();
895
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'DefaultConcurrencyStack');
896
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'DefaultConcurrencyTest', {
897
+ enableChunking: true,
898
+ chunkingConfig: {
899
+ processingMode: 'parallel',
900
+ },
901
+ });
902
+ // Verify stack is created successfully with default maxConcurrency of 10
903
+ expect(stack).toBeDefined();
904
+ });
905
+ });
906
+ describe('Custom configuration overrides defaults', () => {
907
+ test('custom strategy overrides default', () => {
908
+ const testApp = (0, test_utils_1.createTestApp)();
909
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'CustomStrategyStack');
910
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'CustomStrategyTest', {
911
+ enableChunking: true,
912
+ chunkingConfig: {
913
+ strategy: 'token-based',
914
+ },
915
+ });
916
+ const template = assertions_1.Template.fromStack(stack);
917
+ const lambdas = template.findResources('AWS::Lambda::Function');
918
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
919
+ lambda.Properties.MemorySize === 2048);
920
+ expect(chunkingLambda).toBeDefined();
921
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.CHUNKING_STRATEGY', 'token-based');
922
+ });
923
+ test('custom pageThreshold overrides default', () => {
924
+ const testApp = (0, test_utils_1.createTestApp)();
925
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'CustomPageThresholdStack');
926
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'CustomPageThresholdTest', {
927
+ enableChunking: true,
928
+ chunkingConfig: {
929
+ pageThreshold: 50,
930
+ },
931
+ });
932
+ const template = assertions_1.Template.fromStack(stack);
933
+ const lambdas = template.findResources('AWS::Lambda::Function');
934
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
935
+ lambda.Properties.MemorySize === 2048);
936
+ expect(chunkingLambda).toBeDefined();
937
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.PAGE_THRESHOLD', '50');
938
+ });
939
+ test('custom tokenThreshold overrides default', () => {
940
+ const testApp = (0, test_utils_1.createTestApp)();
941
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'CustomTokenThresholdStack');
942
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'CustomTokenThresholdTest', {
943
+ enableChunking: true,
944
+ chunkingConfig: {
945
+ tokenThreshold: 200000,
946
+ },
947
+ });
948
+ const template = assertions_1.Template.fromStack(stack);
949
+ const lambdas = template.findResources('AWS::Lambda::Function');
950
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
951
+ lambda.Properties.MemorySize === 2048);
952
+ expect(chunkingLambda).toBeDefined();
953
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.TOKEN_THRESHOLD', '200000');
954
+ });
955
+ test('custom chunkSize overrides default', () => {
956
+ const testApp = (0, test_utils_1.createTestApp)();
957
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'CustomChunkSizeStack');
958
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'CustomChunkSizeTest', {
959
+ enableChunking: true,
960
+ chunkingConfig: {
961
+ strategy: 'fixed-pages',
962
+ chunkSize: 25,
963
+ },
964
+ });
965
+ const template = assertions_1.Template.fromStack(stack);
966
+ const lambdas = template.findResources('AWS::Lambda::Function');
967
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
968
+ lambda.Properties.MemorySize === 2048);
969
+ expect(chunkingLambda).toBeDefined();
970
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.CHUNK_SIZE', '25');
971
+ });
972
+ test('custom maxPagesPerChunk overrides default', () => {
973
+ const testApp = (0, test_utils_1.createTestApp)();
974
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'CustomMaxPagesStack');
975
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'CustomMaxPagesTest', {
976
+ enableChunking: true,
977
+ chunkingConfig: {
978
+ strategy: 'hybrid',
979
+ maxPagesPerChunk: 75,
980
+ },
981
+ });
982
+ const template = assertions_1.Template.fromStack(stack);
983
+ const lambdas = template.findResources('AWS::Lambda::Function');
984
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
985
+ lambda.Properties.MemorySize === 2048);
986
+ expect(chunkingLambda).toBeDefined();
987
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.MAX_PAGES_PER_CHUNK', '75');
988
+ });
989
+ test('default maxPagesPerChunk is 99 to stay under Bedrock 100-page limit', () => {
990
+ const testApp = (0, test_utils_1.createTestApp)();
991
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'DefaultMaxPagesStack');
992
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'DefaultMaxPagesTest', {
993
+ enableChunking: true,
994
+ chunkingConfig: {
995
+ strategy: 'hybrid',
996
+ // maxPagesPerChunk not specified - should default to 99
997
+ },
998
+ });
999
+ const template = assertions_1.Template.fromStack(stack);
1000
+ const lambdas = template.findResources('AWS::Lambda::Function');
1001
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
1002
+ lambda.Properties.MemorySize === 2048);
1003
+ expect(chunkingLambda).toBeDefined();
1004
+ // Default should be 99, not 100, because Bedrock has a hard limit of 100 pages per PDF
1005
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.MAX_PAGES_PER_CHUNK', '99');
1006
+ });
1007
+ test('custom targetTokensPerChunk overrides default', () => {
1008
+ const testApp = (0, test_utils_1.createTestApp)();
1009
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'CustomTargetTokensStack');
1010
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'CustomTargetTokensTest', {
1011
+ enableChunking: true,
1012
+ chunkingConfig: {
1013
+ strategy: 'hybrid',
1014
+ targetTokensPerChunk: 60000,
1015
+ },
1016
+ });
1017
+ const template = assertions_1.Template.fromStack(stack);
1018
+ const lambdas = template.findResources('AWS::Lambda::Function');
1019
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
1020
+ lambda.Properties.MemorySize === 2048);
1021
+ expect(chunkingLambda).toBeDefined();
1022
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.TARGET_TOKENS_PER_CHUNK', '60000');
1023
+ });
1024
+ test('all custom values override all defaults', () => {
1025
+ const testApp = (0, test_utils_1.createTestApp)();
1026
+ const stack = new aws_cdk_lib_1.Stack(testApp, 'AllCustomStack');
1027
+ new bedrock_document_processing_1.BedrockDocumentProcessing(stack, 'AllCustomTest', {
1028
+ enableChunking: true,
1029
+ chunkingConfig: {
1030
+ strategy: 'fixed-pages',
1031
+ pageThreshold: 200,
1032
+ tokenThreshold: 300000,
1033
+ chunkSize: 75,
1034
+ overlapPages: 10,
1035
+ maxTokensPerChunk: 150000,
1036
+ overlapTokens: 10000,
1037
+ targetTokensPerChunk: 120000,
1038
+ maxPagesPerChunk: 150,
1039
+ processingMode: 'sequential',
1040
+ maxConcurrency: 5,
1041
+ },
1042
+ });
1043
+ const template = assertions_1.Template.fromStack(stack);
1044
+ const lambdas = template.findResources('AWS::Lambda::Function');
1045
+ const chunkingLambda = Object.values(lambdas).find((lambda) => lambda.Properties.Runtime === 'python3.13' &&
1046
+ lambda.Properties.MemorySize === 2048);
1047
+ expect(chunkingLambda).toBeDefined();
1048
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.CHUNKING_STRATEGY', 'fixed-pages');
1049
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.PAGE_THRESHOLD', '200');
1050
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.TOKEN_THRESHOLD', '300000');
1051
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.CHUNK_SIZE', '75');
1052
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.OVERLAP_PAGES', '10');
1053
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.MAX_TOKENS_PER_CHUNK', '150000');
1054
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.OVERLAP_TOKENS', '10000');
1055
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.TARGET_TOKENS_PER_CHUNK', '120000');
1056
+ expect(chunkingLambda).toHaveProperty('Properties.Environment.Variables.MAX_PAGES_PER_CHUNK', '150');
1057
+ });
1058
+ });
259
1059
  });
260
- //# sourceMappingURL=data:application/json;base64,
1060
+ //# sourceMappingURL=data:application/json;base64,