@cdklabs/cdk-appmod-catalog-blueprints 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.jsii +2579 -194
  2. package/lib/document-processing/adapter/adapter.d.ts +4 -2
  3. package/lib/document-processing/adapter/adapter.js +1 -1
  4. package/lib/document-processing/adapter/queued-s3-adapter.d.ts +9 -2
  5. package/lib/document-processing/adapter/queued-s3-adapter.js +29 -15
  6. package/lib/document-processing/agentic-document-processing.d.ts +4 -0
  7. package/lib/document-processing/agentic-document-processing.js +20 -10
  8. package/lib/document-processing/base-document-processing.d.ts +54 -2
  9. package/lib/document-processing/base-document-processing.js +136 -82
  10. package/lib/document-processing/bedrock-document-processing.d.ts +202 -2
  11. package/lib/document-processing/bedrock-document-processing.js +717 -77
  12. package/lib/document-processing/chunking-config.d.ts +614 -0
  13. package/lib/document-processing/chunking-config.js +5 -0
  14. package/lib/document-processing/default-document-processing-config.js +1 -1
  15. package/lib/document-processing/index.d.ts +1 -0
  16. package/lib/document-processing/index.js +2 -1
  17. package/lib/document-processing/resources/aggregation/handler.py +567 -0
  18. package/lib/document-processing/resources/aggregation/requirements.txt +7 -0
  19. package/lib/document-processing/resources/aggregation/test_handler.py +362 -0
  20. package/lib/document-processing/resources/cleanup/handler.py +276 -0
  21. package/lib/document-processing/resources/cleanup/requirements.txt +5 -0
  22. package/lib/document-processing/resources/cleanup/test_handler.py +436 -0
  23. package/lib/document-processing/resources/default-bedrock-invoke/index.py +85 -3
  24. package/lib/document-processing/resources/default-bedrock-invoke/test_index.py +622 -0
  25. package/lib/document-processing/resources/pdf-chunking/README.md +313 -0
  26. package/lib/document-processing/resources/pdf-chunking/chunking_strategies.py +460 -0
  27. package/lib/document-processing/resources/pdf-chunking/error_handling.py +491 -0
  28. package/lib/document-processing/resources/pdf-chunking/handler.py +958 -0
  29. package/lib/document-processing/resources/pdf-chunking/metrics.py +435 -0
  30. package/lib/document-processing/resources/pdf-chunking/requirements.txt +3 -0
  31. package/lib/document-processing/resources/pdf-chunking/strategy_selection.py +420 -0
  32. package/lib/document-processing/resources/pdf-chunking/structured_logging.py +457 -0
  33. package/lib/document-processing/resources/pdf-chunking/test_chunking_strategies.py +353 -0
  34. package/lib/document-processing/resources/pdf-chunking/test_error_handling.py +487 -0
  35. package/lib/document-processing/resources/pdf-chunking/test_handler.py +609 -0
  36. package/lib/document-processing/resources/pdf-chunking/test_integration.py +694 -0
  37. package/lib/document-processing/resources/pdf-chunking/test_metrics.py +532 -0
  38. package/lib/document-processing/resources/pdf-chunking/test_strategy_selection.py +471 -0
  39. package/lib/document-processing/resources/pdf-chunking/test_structured_logging.py +449 -0
  40. package/lib/document-processing/resources/pdf-chunking/test_token_estimation.py +374 -0
  41. package/lib/document-processing/resources/pdf-chunking/token_estimation.py +189 -0
  42. package/lib/document-processing/tests/agentic-document-processing-nag.test.js +4 -3
  43. package/lib/document-processing/tests/agentic-document-processing.test.js +488 -4
  44. package/lib/document-processing/tests/base-document-processing-nag.test.js +9 -2
  45. package/lib/document-processing/tests/base-document-processing-schema.test.d.ts +1 -0
  46. package/lib/document-processing/tests/base-document-processing-schema.test.js +337 -0
  47. package/lib/document-processing/tests/base-document-processing.test.js +114 -8
  48. package/lib/document-processing/tests/bedrock-document-processing-chunking-nag.test.d.ts +1 -0
  49. package/lib/document-processing/tests/bedrock-document-processing-chunking-nag.test.js +382 -0
  50. package/lib/document-processing/tests/bedrock-document-processing-nag.test.js +4 -3
  51. package/lib/document-processing/tests/bedrock-document-processing-security.test.d.ts +1 -0
  52. package/lib/document-processing/tests/bedrock-document-processing-security.test.js +389 -0
  53. package/lib/document-processing/tests/bedrock-document-processing.test.js +808 -8
  54. package/lib/document-processing/tests/chunking-config.test.d.ts +1 -0
  55. package/lib/document-processing/tests/chunking-config.test.js +238 -0
  56. package/lib/document-processing/tests/queued-s3-adapter-nag.test.js +9 -2
  57. package/lib/document-processing/tests/queued-s3-adapter.test.js +17 -6
  58. package/lib/framework/agents/base-agent.js +1 -1
  59. package/lib/framework/agents/batch-agent.js +1 -1
  60. package/lib/framework/agents/default-agent-config.js +1 -1
  61. package/lib/framework/bedrock/bedrock.js +1 -1
  62. package/lib/framework/custom-resource/default-runtimes.js +1 -1
  63. package/lib/framework/foundation/access-log.js +1 -1
  64. package/lib/framework/foundation/eventbridge-broker.js +1 -1
  65. package/lib/framework/foundation/network.d.ts +4 -2
  66. package/lib/framework/foundation/network.js +52 -41
  67. package/lib/framework/tests/access-log.test.js +5 -2
  68. package/lib/framework/tests/batch-agent.test.js +5 -2
  69. package/lib/framework/tests/bedrock.test.js +5 -2
  70. package/lib/framework/tests/eventbridge-broker.test.js +5 -2
  71. package/lib/framework/tests/framework-nag.test.js +26 -7
  72. package/lib/framework/tests/network.test.js +30 -2
  73. package/lib/tsconfig.tsbuildinfo +1 -1
  74. package/lib/utilities/data-loader.js +1 -1
  75. package/lib/utilities/lambda-iam-utils.js +1 -1
  76. package/lib/utilities/observability/cloudfront-distribution-observability-property-injector.js +1 -1
  77. package/lib/utilities/observability/default-observability-config.js +1 -1
  78. package/lib/utilities/observability/lambda-observability-property-injector.js +1 -1
  79. package/lib/utilities/observability/log-group-data-protection-utils.js +1 -1
  80. package/lib/utilities/observability/powertools-config.d.ts +10 -1
  81. package/lib/utilities/observability/powertools-config.js +19 -3
  82. package/lib/utilities/observability/state-machine-observability-property-injector.js +1 -1
  83. package/lib/utilities/test-utils.d.ts +43 -0
  84. package/lib/utilities/test-utils.js +56 -0
  85. package/lib/utilities/tests/data-loader-nag.test.js +3 -2
  86. package/lib/utilities/tests/data-loader.test.js +3 -2
  87. package/lib/webapp/frontend-construct.js +1 -1
  88. package/lib/webapp/tests/frontend-construct-nag.test.js +3 -2
  89. package/lib/webapp/tests/frontend-construct.test.js +3 -2
  90. package/package.json +6 -5
  91. package/lib/document-processing/resources/default-error-handler/index.js +0 -46
  92. package/lib/document-processing/resources/default-pdf-processor/index.js +0 -46
  93. package/lib/document-processing/resources/default-pdf-validator/index.js +0 -36
@@ -11,6 +11,7 @@ const aws_cdk_lib_1 = require("aws-cdk-lib");
11
11
  const aws_ec2_1 = require("aws-cdk-lib/aws-ec2");
12
12
  const aws_iam_1 = require("aws-cdk-lib/aws-iam");
13
13
  const aws_lambda_1 = require("aws-cdk-lib/aws-lambda");
14
+ const aws_stepfunctions_1 = require("aws-cdk-lib/aws-stepfunctions");
14
15
  const aws_stepfunctions_tasks_1 = require("aws-cdk-lib/aws-stepfunctions-tasks");
15
16
  const base_document_processing_1 = require("./base-document-processing");
16
17
  const framework_1 = require("../framework");
@@ -58,9 +59,24 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
58
59
  * @param scope - The scope in which to define this construct
59
60
  * @param id - The scoped construct ID. Must be unique within the scope.
60
61
  * @param props - Configuration properties for the Bedrock document processing pipeline
62
+ * @throws Error if chunking configuration is invalid
61
63
  */
62
64
  constructor(scope, id, props) {
63
65
  super(scope, id, props);
66
+ /** Counter for generating unique classification step IDs */
67
+ this._classificationStepCounter = 0;
68
+ /** Counter for generating unique processing step IDs */
69
+ this._processingStepCounter = 0;
70
+ /** Counter for generating unique aggregation step IDs */
71
+ this._aggregationStepCounter = 0;
72
+ /** Counter for generating unique enrichment step IDs */
73
+ this._enrichmentStepCounter = 0;
74
+ /** Counter for generating unique post-processing step IDs */
75
+ this._postProcessingStepCounter = 0;
76
+ // Validate chunking configuration if provided
77
+ if (props.enableChunking && props.chunkingConfig) {
78
+ this.validateChunkingConfig(props.chunkingConfig);
79
+ }
64
80
  if (props.network) {
65
81
  props.network.createServiceEndpoint('vpce-bedrock', aws_ec2_1.InterfaceVpcEndpointAwsService.BEDROCK);
66
82
  props.network.createServiceEndpoint('vpce-bedrock-runtime', aws_ec2_1.InterfaceVpcEndpointAwsService.BEDROCK_RUNTIME);
@@ -68,6 +84,77 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
68
84
  this.bedrockDocumentProcessingProps = props;
69
85
  this.stateMachine = this.handleStateMachineCreation('bedrock-document-processing-workflow');
70
86
  }
87
+ /**
88
+ * Validates the chunking configuration parameters.
89
+ *
90
+ * Ensures that:
91
+ * - Chunk size is greater than 0
92
+ * - Overlap is non-negative and less than chunk size
93
+ * - Thresholds are greater than 0
94
+ * - Max concurrency is greater than 0
95
+ * - Min success threshold is between 0 and 1
96
+ *
97
+ * @param config - The chunking configuration to validate
98
+ * @throws Error if any configuration parameter is invalid
99
+ */
100
+ validateChunkingConfig(config) {
101
+ // Validate chunk size (for fixed-pages strategy)
102
+ if (config.chunkSize !== undefined) {
103
+ if (config.chunkSize <= 0) {
104
+ throw new Error('ChunkingConfig validation error: chunkSize must be greater than 0');
105
+ }
106
+ }
107
+ // Validate overlap pages (for fixed-pages strategy)
108
+ if (config.overlapPages !== undefined) {
109
+ if (config.overlapPages < 0) {
110
+ throw new Error('ChunkingConfig validation error: overlapPages must be non-negative');
111
+ }
112
+ const effectiveChunkSize = config.chunkSize || 50; // default chunk size
113
+ if (config.overlapPages >= effectiveChunkSize) {
114
+ throw new Error('ChunkingConfig validation error: overlapPages must be less than chunkSize');
115
+ }
116
+ }
117
+ // Validate page threshold
118
+ if (config.pageThreshold !== undefined && config.pageThreshold <= 0) {
119
+ throw new Error('ChunkingConfig validation error: pageThreshold must be greater than 0');
120
+ }
121
+ // Validate token threshold
122
+ if (config.tokenThreshold !== undefined && config.tokenThreshold <= 0) {
123
+ throw new Error('ChunkingConfig validation error: tokenThreshold must be greater than 0');
124
+ }
125
+ // Validate max tokens per chunk (for token-based strategy)
126
+ if (config.maxTokensPerChunk !== undefined && config.maxTokensPerChunk <= 0) {
127
+ throw new Error('ChunkingConfig validation error: maxTokensPerChunk must be greater than 0');
128
+ }
129
+ // Validate overlap tokens (for token-based and hybrid strategies)
130
+ if (config.overlapTokens !== undefined) {
131
+ if (config.overlapTokens < 0) {
132
+ throw new Error('ChunkingConfig validation error: overlapTokens must be non-negative');
133
+ }
134
+ const effectiveMaxTokens = config.maxTokensPerChunk || 100000; // default max tokens
135
+ if (config.overlapTokens >= effectiveMaxTokens) {
136
+ throw new Error('ChunkingConfig validation error: overlapTokens must be less than maxTokensPerChunk');
137
+ }
138
+ }
139
+ // Validate max pages per chunk (for hybrid strategy)
140
+ if (config.maxPagesPerChunk !== undefined && config.maxPagesPerChunk <= 0) {
141
+ throw new Error('ChunkingConfig validation error: maxPagesPerChunk must be greater than 0');
142
+ }
143
+ // Validate target tokens per chunk (for hybrid strategy)
144
+ if (config.targetTokensPerChunk !== undefined && config.targetTokensPerChunk <= 0) {
145
+ throw new Error('ChunkingConfig validation error: targetTokensPerChunk must be greater than 0');
146
+ }
147
+ // Validate max concurrency
148
+ if (config.maxConcurrency !== undefined && config.maxConcurrency <= 0) {
149
+ throw new Error('ChunkingConfig validation error: maxConcurrency must be greater than 0');
150
+ }
151
+ // Validate min success threshold
152
+ if (config.minSuccessThreshold !== undefined) {
153
+ if (config.minSuccessThreshold < 0 || config.minSuccessThreshold > 1) {
154
+ throw new Error('ChunkingConfig validation error: minSuccessThreshold must be between 0 and 1');
155
+ }
156
+ }
157
+ }
71
158
  /**
72
159
  * Implements the document classification step using Amazon Bedrock.
73
160
  *
@@ -75,47 +162,60 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
75
162
  * the document type. The function reads the document from S3 and sends it to
76
163
  * Bedrock with the classification prompt.
77
164
  *
165
+ * This method caches the Lambda function to avoid creating duplicate resources,
166
+ * but creates a new LambdaInvoke task each time to allow proper state chaining.
167
+ *
78
168
  * @returns LambdaInvoke task configured for document classification
79
169
  */
80
170
  classificationStep() {
81
- const prompt = this.bedrockDocumentProcessingProps.classificationPrompt || BedrockDocumentProcessing.DEFAULT_CLASSIFICATION_PROMPT;
82
- const adjustedModelId = bedrock_1.BedrockModelUtils.deriveActualModelId(this.bedrockDocumentProcessingProps.classificationBedrockModel);
83
- const role = this.generateLambdaRoleForBedrock('ClassificationLambdaRole', this.bedrockDocumentProcessingProps.classificationBedrockModel);
84
- const { region, account } = aws_cdk_lib_1.Stack.of(this);
85
- const generatedLogPermissions = utilities_1.LambdaIamUtils.createLogsPermissions({
86
- account,
87
- functionName: 'bedrock-idp-classification',
88
- region,
89
- scope: this,
90
- enableObservability: this.bedrockDocumentProcessingProps.enableObservability,
91
- });
92
- this.encryptionKey.grantEncryptDecrypt(role);
93
- const bedrockFunction = new aws_lambda_python_alpha_1.PythonFunction(this, 'BedrockClassificationFunction', {
94
- functionName: generatedLogPermissions.uniqueFunctionName,
95
- architecture: aws_lambda_1.Architecture.X86_64,
96
- runtime: framework_1.DefaultRuntimes.PYTHON,
97
- entry: path.join(__dirname, 'resources/default-bedrock-invoke'),
98
- role,
99
- memorySize: 512,
100
- timeout: this.bedrockDocumentProcessingProps.stepTimeouts || aws_cdk_lib_1.Duration.minutes(5),
101
- environment: {
102
- MODEL_ID: adjustedModelId,
103
- PROMPT: prompt,
104
- INVOKE_TYPE: 'classification',
105
- ...powertools_config_1.PowertoolsConfig.generateDefaultLambdaConfig(this.bedrockDocumentProcessingProps.enableObservability, this.metricNamespace, this.metricServiceName),
106
- },
107
- environmentEncryption: this.encryptionKey,
108
- vpc: this.bedrockDocumentProcessingProps.network ? this.bedrockDocumentProcessingProps.network.vpc : undefined,
109
- vpcSubnets: this.bedrockDocumentProcessingProps.network ? this.bedrockDocumentProcessingProps.network.applicationSubnetSelection() : undefined,
110
- });
111
- for (const statement of generatedLogPermissions.policyStatements) {
112
- bedrockFunction.role?.addToPrincipalPolicy(statement);
113
- }
114
- if (this.bedrockDocumentProcessingProps.network) {
115
- bedrockFunction.role?.addToPrincipalPolicy(utilities_1.LambdaIamUtils.generateLambdaVPCPermissions());
171
+ // Create Lambda function only once
172
+ if (!this._classificationFunction) {
173
+ const prompt = this.bedrockDocumentProcessingProps.classificationPrompt || BedrockDocumentProcessing.DEFAULT_CLASSIFICATION_PROMPT;
174
+ const adjustedModelId = bedrock_1.BedrockModelUtils.deriveActualModelId(this.bedrockDocumentProcessingProps.classificationBedrockModel);
175
+ const role = this.generateLambdaRoleForBedrock('ClassificationLambdaRole', this.bedrockDocumentProcessingProps.classificationBedrockModel);
176
+ const { region, account } = aws_cdk_lib_1.Stack.of(this);
177
+ const generatedLogPermissions = utilities_1.LambdaIamUtils.createLogsPermissions({
178
+ account,
179
+ functionName: 'bedrock-idp-classification',
180
+ region,
181
+ scope: this,
182
+ enableObservability: this.bedrockDocumentProcessingProps.enableObservability,
183
+ });
184
+ this.encryptionKey.grantEncryptDecrypt(role);
185
+ this._classificationFunction = new aws_lambda_python_alpha_1.PythonFunction(this, 'BedrockClassificationFunction', {
186
+ functionName: generatedLogPermissions.uniqueFunctionName,
187
+ architecture: aws_lambda_1.Architecture.X86_64,
188
+ runtime: framework_1.DefaultRuntimes.PYTHON,
189
+ entry: path.join(__dirname, 'resources/default-bedrock-invoke'),
190
+ role,
191
+ memorySize: 512,
192
+ timeout: this.bedrockDocumentProcessingProps.stepTimeouts || aws_cdk_lib_1.Duration.minutes(5),
193
+ environment: {
194
+ MODEL_ID: adjustedModelId,
195
+ PROMPT: prompt,
196
+ INVOKE_TYPE: 'classification',
197
+ ...powertools_config_1.PowertoolsConfig.generateDefaultLambdaConfig(this.bedrockDocumentProcessingProps.enableObservability, this.metricNamespace, this.metricServiceName),
198
+ },
199
+ environmentEncryption: this.encryptionKey,
200
+ vpc: this.bedrockDocumentProcessingProps.network
201
+ ? this.bedrockDocumentProcessingProps.network.vpc
202
+ : undefined,
203
+ vpcSubnets: this.bedrockDocumentProcessingProps.network
204
+ ? this.bedrockDocumentProcessingProps.network.applicationSubnetSelection()
205
+ : undefined,
206
+ });
207
+ for (const statement of generatedLogPermissions.policyStatements) {
208
+ this._classificationFunction.role?.addToPrincipalPolicy(statement);
209
+ }
210
+ if (this.bedrockDocumentProcessingProps.network) {
211
+ this._classificationFunction.role?.addToPrincipalPolicy(utilities_1.LambdaIamUtils.generateLambdaVPCPermissions());
212
+ }
116
213
  }
117
- return new aws_stepfunctions_tasks_1.LambdaInvoke(this, 'ClassificationStep', {
118
- lambdaFunction: bedrockFunction,
214
+ // Always create a new LambdaInvoke task to allow proper state chaining
215
+ const stepId = `ClassificationStep-${this._classificationStepCounter}`;
216
+ this._classificationStepCounter++;
217
+ return new aws_stepfunctions_tasks_1.LambdaInvoke(this, stepId, {
218
+ lambdaFunction: this._classificationFunction,
119
219
  resultPath: '$.classificationResult',
120
220
  resultSelector: {
121
221
  'documentClassification.$': '$.Payload.documentClassification',
@@ -129,46 +229,59 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
129
229
  * structured data from the document. Uses the classification result from the
130
230
  * previous step to provide context for more accurate extraction.
131
231
  *
232
+ * This method caches the Lambda function to avoid creating duplicate resources,
233
+ * but creates a new LambdaInvoke task each time to allow proper state chaining.
234
+ *
132
235
  * @returns LambdaInvoke task configured for document extraction
133
236
  */
134
237
  processingStep() {
135
- const prompt = this.bedrockDocumentProcessingProps.processingPrompt || BedrockDocumentProcessing.DEFAULT_PROCESSING_PROMPT;
136
- const adjustedModelId = bedrock_1.BedrockModelUtils.deriveActualModelId(this.bedrockDocumentProcessingProps.processingBedrockModel);
137
- const role = this.generateLambdaRoleForBedrock('ProcessingLambdaRole', this.bedrockDocumentProcessingProps.processingBedrockModel);
138
- const { region, account } = aws_cdk_lib_1.Stack.of(this);
139
- const generatedLogPermissions = utilities_1.LambdaIamUtils.createLogsPermissions({
140
- account,
141
- functionName: 'bedrock-idp-processing',
142
- region,
143
- scope: this,
144
- });
145
- this.encryptionKey.grantEncryptDecrypt(role);
146
- const bedrockFunction = new aws_lambda_python_alpha_1.PythonFunction(this, 'BedrockExtractionFunction', {
147
- functionName: generatedLogPermissions.uniqueFunctionName,
148
- runtime: framework_1.DefaultRuntimes.PYTHON,
149
- architecture: aws_lambda_1.Architecture.X86_64,
150
- entry: path.join(__dirname, 'resources/default-bedrock-invoke'),
151
- role,
152
- memorySize: 512,
153
- timeout: this.bedrockDocumentProcessingProps.stepTimeouts || aws_cdk_lib_1.Duration.minutes(5),
154
- environment: {
155
- MODEL_ID: adjustedModelId,
156
- PROMPT: prompt,
157
- INVOKE_TYPE: 'processing',
158
- ...powertools_config_1.PowertoolsConfig.generateDefaultLambdaConfig(this.bedrockDocumentProcessingProps.enableObservability, this.metricNamespace, this.metricServiceName),
159
- },
160
- environmentEncryption: this.encryptionKey,
161
- vpc: this.bedrockDocumentProcessingProps.network ? this.bedrockDocumentProcessingProps.network.vpc : undefined,
162
- vpcSubnets: this.bedrockDocumentProcessingProps.network ? this.bedrockDocumentProcessingProps.network.applicationSubnetSelection() : undefined,
163
- });
164
- for (const statement of generatedLogPermissions.policyStatements) {
165
- bedrockFunction.role?.addToPrincipalPolicy(statement);
166
- }
167
- if (this.bedrockDocumentProcessingProps.network) {
168
- bedrockFunction.role?.addToPrincipalPolicy(utilities_1.LambdaIamUtils.generateLambdaVPCPermissions());
238
+ // Create Lambda function only once
239
+ if (!this._processingFunction) {
240
+ const prompt = this.bedrockDocumentProcessingProps.processingPrompt || BedrockDocumentProcessing.DEFAULT_PROCESSING_PROMPT;
241
+ const adjustedModelId = bedrock_1.BedrockModelUtils.deriveActualModelId(this.bedrockDocumentProcessingProps.processingBedrockModel);
242
+ const role = this.generateLambdaRoleForBedrock('ProcessingLambdaRole', this.bedrockDocumentProcessingProps.processingBedrockModel);
243
+ const { region, account } = aws_cdk_lib_1.Stack.of(this);
244
+ const generatedLogPermissions = utilities_1.LambdaIamUtils.createLogsPermissions({
245
+ account,
246
+ functionName: 'bedrock-idp-processing',
247
+ region,
248
+ scope: this,
249
+ });
250
+ this.encryptionKey.grantEncryptDecrypt(role);
251
+ this._processingFunction = new aws_lambda_python_alpha_1.PythonFunction(this, 'BedrockExtractionFunction', {
252
+ functionName: generatedLogPermissions.uniqueFunctionName,
253
+ runtime: framework_1.DefaultRuntimes.PYTHON,
254
+ architecture: aws_lambda_1.Architecture.X86_64,
255
+ entry: path.join(__dirname, 'resources/default-bedrock-invoke'),
256
+ role,
257
+ memorySize: 512,
258
+ timeout: this.bedrockDocumentProcessingProps.stepTimeouts || aws_cdk_lib_1.Duration.minutes(5),
259
+ environment: {
260
+ MODEL_ID: adjustedModelId,
261
+ PROMPT: prompt,
262
+ INVOKE_TYPE: 'processing',
263
+ ...powertools_config_1.PowertoolsConfig.generateDefaultLambdaConfig(this.bedrockDocumentProcessingProps.enableObservability, this.metricNamespace, this.metricServiceName),
264
+ },
265
+ environmentEncryption: this.encryptionKey,
266
+ vpc: this.bedrockDocumentProcessingProps.network
267
+ ? this.bedrockDocumentProcessingProps.network.vpc
268
+ : undefined,
269
+ vpcSubnets: this.bedrockDocumentProcessingProps.network
270
+ ? this.bedrockDocumentProcessingProps.network.applicationSubnetSelection()
271
+ : undefined,
272
+ });
273
+ for (const statement of generatedLogPermissions.policyStatements) {
274
+ this._processingFunction.role?.addToPrincipalPolicy(statement);
275
+ }
276
+ if (this.bedrockDocumentProcessingProps.network) {
277
+ this._processingFunction.role?.addToPrincipalPolicy(utilities_1.LambdaIamUtils.generateLambdaVPCPermissions());
278
+ }
169
279
  }
170
- return new aws_stepfunctions_tasks_1.LambdaInvoke(this, 'ProcessingStep', {
171
- lambdaFunction: bedrockFunction,
280
+ // Always create a new LambdaInvoke task to allow proper state chaining
281
+ const stepId = `ProcessingStep-${this._processingStepCounter}`;
282
+ this._processingStepCounter++;
283
+ return new aws_stepfunctions_tasks_1.LambdaInvoke(this, stepId, {
284
+ lambdaFunction: this._processingFunction,
172
285
  resultPath: '$.processingResult',
173
286
  resultSelector: {
174
287
  'documentClassification.$': '$.Payload.documentClassification',
@@ -182,6 +295,7 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
182
295
  inlinePolicies: {
183
296
  BedrockInvokePolicy: new aws_iam_1.PolicyDocument({
184
297
  statements: [
298
+ // S3 read-only access for document retrieval - least privilege
185
299
  ...this.ingressAdapter.generateAdapterIAMPolicies(),
186
300
  bedrock_1.BedrockModelUtils.generateModelIAMPermissions(this, model),
187
301
  ],
@@ -202,9 +316,13 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
202
316
  if (!this.bedrockDocumentProcessingProps.enrichmentLambdaFunction) {
203
317
  return undefined;
204
318
  }
205
- return new aws_stepfunctions_tasks_1.LambdaInvoke(this, 'EnrichmentStep', {
319
+ const stepId = `EnrichmentStep-${this._enrichmentStepCounter}`;
320
+ this._enrichmentStepCounter++;
321
+ return new aws_stepfunctions_tasks_1.LambdaInvoke(this, stepId, {
206
322
  lambdaFunction: this.bedrockDocumentProcessingProps.enrichmentLambdaFunction,
207
323
  resultPath: '$.enrichedResult',
324
+ outputPath: '$',
325
+ payloadResponseOnly: true,
208
326
  });
209
327
  }
210
328
  /**
@@ -220,15 +338,515 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
220
338
  if (!this.bedrockDocumentProcessingProps.postProcessingLambdaFunction) {
221
339
  return undefined;
222
340
  }
223
- return new aws_stepfunctions_tasks_1.LambdaInvoke(this, 'PostProcessingStep', {
341
+ const stepId = `PostProcessingStep-${this._postProcessingStepCounter}`;
342
+ this._postProcessingStepCounter++;
343
+ return new aws_stepfunctions_tasks_1.LambdaInvoke(this, stepId, {
224
344
  lambdaFunction: this.bedrockDocumentProcessingProps.postProcessingLambdaFunction,
225
345
  resultPath: '$.postProcessedResult',
346
+ outputPath: '$',
347
+ payloadResponseOnly: true,
226
348
  });
227
349
  }
350
+ /**
351
+ * Implements the optional preprocessing step for PDF chunking.
352
+ *
353
+ * When chunking is enabled, creates a Lambda function that analyzes PDFs and
354
+ * splits large documents into manageable chunks. The function:
355
+ * 1. Analyzes the PDF to determine page count and token estimates
356
+ * 2. Decides if chunking is needed based on configured thresholds
357
+ * 3. If chunking is needed, splits the PDF and uploads chunks to S3
358
+ *
359
+ * @returns LambdaInvoke task for PDF analysis and chunking, or undefined if chunking is disabled
360
+ */
361
+ preprocessingStep() {
362
+ // Only enable chunking if explicitly configured
363
+ if (!this.bedrockDocumentProcessingProps.enableChunking) {
364
+ return undefined;
365
+ }
366
+ const { region, account } = aws_cdk_lib_1.Stack.of(this);
367
+ const chunkingConfig = this.bedrockDocumentProcessingProps.chunkingConfig || {};
368
+ // Create IAM role for chunking Lambda with least privilege permissions
369
+ // Chunking Lambda needs: GetObject (read raw PDFs), PutObject (write chunks)
370
+ const role = new aws_iam_1.Role(this, 'ChunkingLambdaRole', {
371
+ assumedBy: new aws_iam_1.ServicePrincipal('lambda.amazonaws.com'),
372
+ inlinePolicies: {
373
+ ChunkingPolicy: new aws_iam_1.PolicyDocument({
374
+ statements: [
375
+ ...this.ingressAdapter.generateAdapterIAMPolicies(),
376
+ ],
377
+ }),
378
+ },
379
+ });
380
+ const generatedLogPermissions = utilities_1.LambdaIamUtils.createLogsPermissions({
381
+ account,
382
+ functionName: 'bedrock-idp-chunking',
383
+ region,
384
+ scope: this,
385
+ enableObservability: this.bedrockDocumentProcessingProps.enableObservability,
386
+ });
387
+ this.encryptionKey.grantEncryptDecrypt(role);
388
+ // Create PDF Analysis & Chunking Lambda
389
+ const chunkingLambda = new aws_lambda_python_alpha_1.PythonFunction(this, 'PDFChunkingFunction', {
390
+ functionName: generatedLogPermissions.uniqueFunctionName,
391
+ entry: path.join(__dirname, 'resources/pdf-chunking'),
392
+ index: 'handler.py',
393
+ handler: 'handler',
394
+ runtime: framework_1.DefaultRuntimes.PYTHON,
395
+ architecture: aws_lambda_1.Architecture.X86_64,
396
+ role,
397
+ memorySize: 2048,
398
+ timeout: aws_cdk_lib_1.Duration.minutes(10),
399
+ environment: {
400
+ CHUNKING_STRATEGY: chunkingConfig.strategy || 'hybrid',
401
+ PAGE_THRESHOLD: String(chunkingConfig.pageThreshold || 100),
402
+ TOKEN_THRESHOLD: String(chunkingConfig.tokenThreshold || 150000),
403
+ CHUNK_SIZE: String(chunkingConfig.chunkSize || 50),
404
+ OVERLAP_PAGES: String(chunkingConfig.overlapPages || 5),
405
+ MAX_TOKENS_PER_CHUNK: String(chunkingConfig.maxTokensPerChunk || 100000),
406
+ OVERLAP_TOKENS: String(chunkingConfig.overlapTokens || 5000),
407
+ TARGET_TOKENS_PER_CHUNK: String(chunkingConfig.targetTokensPerChunk || 80000),
408
+ MAX_PAGES_PER_CHUNK: String(chunkingConfig.maxPagesPerChunk || 99),
409
+ ...powertools_config_1.PowertoolsConfig.generateDefaultLambdaConfig(this.bedrockDocumentProcessingProps.enableObservability, this.metricNamespace, this.metricServiceName),
410
+ },
411
+ environmentEncryption: this.encryptionKey,
412
+ vpc: this.bedrockDocumentProcessingProps.network
413
+ ? this.bedrockDocumentProcessingProps.network.vpc
414
+ : undefined,
415
+ vpcSubnets: this.bedrockDocumentProcessingProps.network
416
+ ? this.bedrockDocumentProcessingProps.network.applicationSubnetSelection()
417
+ : undefined,
418
+ });
419
+ for (const statement of generatedLogPermissions.policyStatements) {
420
+ chunkingLambda.role?.addToPrincipalPolicy(statement);
421
+ }
422
+ if (this.bedrockDocumentProcessingProps.network) {
423
+ chunkingLambda.role?.addToPrincipalPolicy(utilities_1.LambdaIamUtils.generateLambdaVPCPermissions());
424
+ }
425
+ return new aws_stepfunctions_tasks_1.LambdaInvoke(this, 'PDFAnalysisAndChunking', {
426
+ lambdaFunction: chunkingLambda,
427
+ resultPath: '$.chunkingResult',
428
+ resultSelector: {
429
+ 'requiresChunking.$': '$.Payload.requiresChunking',
430
+ 'tokenAnalysis.$': '$.Payload.tokenAnalysis',
431
+ 'strategy.$': '$.Payload.strategy',
432
+ 'chunks.$': '$.Payload.chunks',
433
+ },
434
+ });
435
+ }
436
+ /**
437
+ * Provides additional metadata fields for chunking to be stored in DynamoDB.
438
+ *
439
+ * When chunking is enabled, adds fields for:
440
+ * - ChunkingEnabled: string representation of boolean flag
441
+ * - ChunkingStrategy: strategy used (fixed-pages, token-based, hybrid)
442
+ * - TokenAnalysis: JSON string with token analysis results
443
+ * - ChunkMetadata: JSON string array with chunk information
444
+ *
445
+ * @returns Record of DynamoDB attribute values for chunking metadata
446
+ */
447
+ preprocessingMetadata() {
448
+ if (!this.bedrockDocumentProcessingProps.enableChunking) {
449
+ return {};
450
+ }
451
+ return {
452
+ ChunkingEnabled: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.stringAt('States.Format(\'{}\', $.chunkingResult.requiresChunking)')),
453
+ ChunkingStrategy: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.stringAt('$.chunkingResult.strategy')),
454
+ TokenAnalysis: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.jsonToString(aws_stepfunctions_1.JsonPath.objectAt('$.chunkingResult.tokenAnalysis'))),
455
+ ChunkMetadata: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.jsonToString(aws_stepfunctions_1.JsonPath.objectAt('$.chunkingResult.chunks'))),
456
+ };
457
+ }
458
+ /**
459
+ * Creates the processing workflow with conditional branching for chunked documents.
460
+ *
461
+ * When chunking is enabled, creates a Choice State that:
462
+ * - Routes to chunked processing flow if document was chunked
463
+ * - Routes to standard processing flow if document was not chunked
464
+ *
465
+ * When chunking is disabled, returns the standard processing workflow.
466
+ *
467
+ * @returns Step Functions chain for processing the document
468
+ */
469
+ createProcessingWorkflow() {
470
+ // If chunking is not enabled, use standard workflow
471
+ if (!this.bedrockDocumentProcessingProps.enableChunking) {
472
+ return this.createStandardProcessingWorkflow();
473
+ }
474
+ // Create Choice State to check if chunking was applied
475
+ const choiceState = new aws_stepfunctions_1.Choice(this, 'CheckIfChunked');
476
+ choiceState
477
+ .when(aws_stepfunctions_1.Condition.booleanEquals('$.chunkingResult.requiresChunking', true), this.createChunkedProcessingFlow())
478
+ .otherwise(
479
+ // Pass 'Standard' prefix to avoid construct ID collisions with chunked flow
480
+ this.createStandardProcessingWorkflow('Standard'));
481
+ return choiceState;
482
+ }
483
+ /**
484
+ * Creates the chunked processing flow for large documents.
485
+ *
486
+ * This flow:
487
+ * 1. Uses a Map State to process each chunk in parallel (or sequentially)
488
+ * 2. Each chunk goes through classification and processing
489
+ * 3. Results are aggregated using the aggregation Lambda
490
+ * 4. DynamoDB is updated with the aggregated result
491
+ * 5. Temporary chunks are cleaned up from S3
492
+ *
493
+ * @returns Step Functions chain for chunked document processing
494
+ */
495
+ createChunkedProcessingFlow() {
496
+ const chunkingConfig = this.bedrockDocumentProcessingProps.chunkingConfig || {};
497
+ const maxConcurrency = chunkingConfig.processingMode === 'sequential'
498
+ ? 1
499
+ : (chunkingConfig.maxConcurrency || 10);
500
+ // Create Map State for processing chunks
501
+ const mapState = new aws_stepfunctions_1.Map(this, 'ProcessChunks', {
502
+ itemsPath: '$.chunkingResult.chunks',
503
+ maxConcurrency,
504
+ parameters: {
505
+ 'documentId.$': '$.documentId',
506
+ 'chunk.$': '$$.Map.Item.Value',
507
+ 'chunkIndex.$': '$$.Map.Item.Index',
508
+ 'totalChunks.$': 'States.ArrayLength($.chunkingResult.chunks)',
509
+ // Override content to point to the chunk PDF, not the original document
510
+ 'content': {
511
+ 'location': 's3',
512
+ 'bucket.$': '$$.Map.Item.Value.bucket',
513
+ 'key.$': '$$.Map.Item.Value.key',
514
+ 'filename.$': '$.content.filename',
515
+ },
516
+ 'contentType.$': '$.contentType',
517
+ },
518
+ resultPath: '$.chunkResults',
519
+ });
520
+ // Define per-chunk processing: classification → processing
521
+ const chunkClassification = this.classificationStep();
522
+ const chunkProcessing = this.processingStep();
523
+ mapState.itemProcessor(chunkClassification.next(chunkProcessing));
524
+ // Create aggregation step (Lambda invoke only, normalization added separately)
525
+ const aggregationLambdaStep = this.createAggregationStep();
526
+ // Add a Pass state to normalize the aggregated result for downstream compatibility
527
+ // This copies aggregatedResult to processingResult so enrichment/post-processing
528
+ // see a consistent structure regardless of whether chunking was used
529
+ const normalizeState = new aws_stepfunctions_1.Pass(this, 'NormalizeAggregatedResult', {
530
+ parameters: {
531
+ 'documentId.$': '$.documentId',
532
+ 'contentType.$': '$.contentType',
533
+ 'content.$': '$.content',
534
+ 'chunkingResult.$': '$.chunkingResult',
535
+ 'chunkResults.$': '$.chunkResults',
536
+ 'aggregatedResult.$': '$.aggregatedResult',
537
+ // Copy aggregated result to processingResult for downstream compatibility
538
+ 'processingResult': {
539
+ 'result.$': '$.aggregatedResult.result',
540
+ },
541
+ // Also set classificationResult from the first successful chunk for consistency
542
+ 'classificationResult.$': '$.chunkResults[0].classificationResult',
543
+ },
544
+ });
545
+ // Create DynamoDB update step for aggregated result
546
+ const updateAggregatedResultStep = this.createUpdateAggregatedResultStep();
547
+ // Create cleanup step
548
+ const cleanupStep = this.createCleanupStep();
549
+ // Create move to processed chain with 'Chunked' prefix to avoid ID collisions
550
+ const moveToProcessed = this.ingressAdapter.createSuccessChain(this, 'Chunked');
551
+ // Create error handler for aggregation failures
552
+ const aggregationErrorHandler = this.createAggregationErrorHandler();
553
+ // Get optional enrichment and post-processing steps
554
+ const enrichmentStep = this.enrichmentStep();
555
+ const postProcessingStep = this.postProcessingStep();
556
+ // Build the final chain after aggregation
557
+ // Chain: Map State → Aggregation → DynamoDB Update → [Enrichment] → [PostProcessing] → Cleanup → Move to Processed
558
+ let finalChain = cleanupStep
559
+ .addRetry({
560
+ errors: ['Lambda.ServiceException', 'Lambda.TooManyRequestsException'],
561
+ interval: aws_cdk_lib_1.Duration.seconds(2),
562
+ maxAttempts: 3,
563
+ backoffRate: 2,
564
+ })
565
+ .next(moveToProcessed);
566
+ // Add post-processing if provided (insert before cleanup)
567
+ if (postProcessingStep) {
568
+ const postProcessingErrorHandler = new aws_stepfunctions_tasks_1.DynamoUpdateItem(this, 'ChunkedPostProcessingFailDDBUpdate', {
569
+ table: this.documentProcessingTable,
570
+ key: {
571
+ DocumentId: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.stringAt('$.documentId')),
572
+ },
573
+ updateExpression: 'SET WorkflowStatus = :newStatus',
574
+ expressionAttributeValues: {
575
+ ':newStatus': aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString('post-processing-failure'),
576
+ },
577
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
578
+ }).next(this.ingressAdapter.createFailedChain(this, 'ChunkedPostProc'));
579
+ finalChain = postProcessingStep
580
+ .addCatch(postProcessingErrorHandler, {
581
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
582
+ })
583
+ .next(new aws_stepfunctions_tasks_1.DynamoUpdateItem(this, 'ChunkedPostProcessingSuccessUpdate', {
584
+ table: this.documentProcessingTable,
585
+ key: {
586
+ DocumentId: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.stringAt('$.documentId')),
587
+ },
588
+ updateExpression: 'SET WorkflowStatus = :newStatus, PostProcessingResult = :postProcessingResult',
589
+ expressionAttributeValues: {
590
+ ':newStatus': aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString('post-processing-complete'),
591
+ ':postProcessingResult': aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.jsonToString(aws_stepfunctions_1.JsonPath.objectAt('$.postProcessedResult'))),
592
+ },
593
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
594
+ }).next(finalChain));
595
+ }
596
+ // Add enrichment if provided (insert before post-processing or cleanup)
597
+ if (enrichmentStep) {
598
+ const enrichmentErrorHandler = new aws_stepfunctions_tasks_1.DynamoUpdateItem(this, 'ChunkedEnrichmentFailDDBUpdate', {
599
+ table: this.documentProcessingTable,
600
+ key: {
601
+ DocumentId: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.stringAt('$.documentId')),
602
+ },
603
+ updateExpression: 'SET WorkflowStatus = :newStatus',
604
+ expressionAttributeValues: {
605
+ ':newStatus': aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString('enrichment-failure'),
606
+ },
607
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
608
+ }).next(this.ingressAdapter.createFailedChain(this, 'ChunkedEnrich'));
609
+ finalChain = enrichmentStep
610
+ .addCatch(enrichmentErrorHandler, {
611
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
612
+ })
613
+ .next(new aws_stepfunctions_tasks_1.DynamoUpdateItem(this, 'ChunkedEnrichmentSuccessUpdate', {
614
+ table: this.documentProcessingTable,
615
+ key: {
616
+ DocumentId: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.stringAt('$.documentId')),
617
+ },
618
+ updateExpression: 'SET WorkflowStatus = :newStatus, EnrichmentResult = :enrichmentResult',
619
+ expressionAttributeValues: {
620
+ ':newStatus': postProcessingStep
621
+ ? aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString('enrichment-complete')
622
+ : aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString('complete'),
623
+ ':enrichmentResult': aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.jsonToString(aws_stepfunctions_1.JsonPath.objectAt('$.enrichedResult'))),
624
+ },
625
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
626
+ }).next(finalChain));
627
+ }
628
+ // Chain: Map State → Aggregation → Normalize → DynamoDB Update → [Enrichment] → [PostProcessing] → Cleanup → Move to Processed
629
+ return mapState
630
+ .addCatch(aggregationErrorHandler, {
631
+ resultPath: '$.error',
632
+ })
633
+ .next(aggregationLambdaStep
634
+ .addCatch(aggregationErrorHandler, {
635
+ resultPath: '$.error',
636
+ })
637
+ .addRetry({
638
+ errors: ['Lambda.ServiceException', 'Lambda.TooManyRequestsException'],
639
+ interval: aws_cdk_lib_1.Duration.seconds(2),
640
+ maxAttempts: 3,
641
+ backoffRate: 2,
642
+ })
643
+ .next(normalizeState.next(updateAggregatedResultStep
644
+ .addRetry({
645
+ errors: ['DynamoDB.ProvisionedThroughputExceededException'],
646
+ interval: aws_cdk_lib_1.Duration.seconds(1),
647
+ maxAttempts: 3,
648
+ backoffRate: 2,
649
+ })
650
+ .next(finalChain))));
651
+ }
652
+ /**
653
+ * Creates the aggregation step for combining chunk results using Bedrock.
654
+ *
655
+ * Uses the same Bedrock invoke Lambda pattern as the processing step but with
656
+ * a different prompt designed for aggregating multiple chunk results.
657
+ * The chunk processing results are passed as text data to the model.
658
+ *
659
+ * @returns LambdaInvoke task for result aggregation
660
+ */
661
+ createAggregationStep() {
662
+ // Create Lambda function only once (reuses bedrock-invoke pattern)
663
+ if (!this._aggregationFunction) {
664
+ const prompt = this.bedrockDocumentProcessingProps.aggregationPrompt || BedrockDocumentProcessing.DEFAULT_AGGREGATION_PROMPT;
665
+ const adjustedModelId = bedrock_1.BedrockModelUtils.deriveActualModelId(this.bedrockDocumentProcessingProps.processingBedrockModel);
666
+ const role = this.generateLambdaRoleForBedrock('AggregationLambdaRole', this.bedrockDocumentProcessingProps.processingBedrockModel);
667
+ const { region, account } = aws_cdk_lib_1.Stack.of(this);
668
+ const generatedLogPermissions = utilities_1.LambdaIamUtils.createLogsPermissions({
669
+ account,
670
+ functionName: 'bedrock-idp-aggregation',
671
+ region,
672
+ scope: this,
673
+ enableObservability: this.bedrockDocumentProcessingProps.enableObservability,
674
+ });
675
+ this.encryptionKey.grantEncryptDecrypt(role);
676
+ this._aggregationFunction = new aws_lambda_python_alpha_1.PythonFunction(this, 'BedrockAggregationFunction', {
677
+ functionName: generatedLogPermissions.uniqueFunctionName,
678
+ architecture: aws_lambda_1.Architecture.X86_64,
679
+ runtime: framework_1.DefaultRuntimes.PYTHON,
680
+ entry: path.join(__dirname, 'resources/default-bedrock-invoke'),
681
+ role,
682
+ memorySize: 1024,
683
+ timeout: this.bedrockDocumentProcessingProps.stepTimeouts || aws_cdk_lib_1.Duration.minutes(5),
684
+ environment: {
685
+ MODEL_ID: adjustedModelId,
686
+ PROMPT: prompt,
687
+ INVOKE_TYPE: 'aggregation',
688
+ INVOKE_MAX_TOKENS: '64000', // Aggregation may need more tokens for merged output
689
+ ...powertools_config_1.PowertoolsConfig.generateDefaultLambdaConfig(this.bedrockDocumentProcessingProps.enableObservability, this.metricNamespace, this.metricServiceName),
690
+ },
691
+ environmentEncryption: this.encryptionKey,
692
+ vpc: this.bedrockDocumentProcessingProps.network
693
+ ? this.bedrockDocumentProcessingProps.network.vpc
694
+ : undefined,
695
+ vpcSubnets: this.bedrockDocumentProcessingProps.network
696
+ ? this.bedrockDocumentProcessingProps.network.applicationSubnetSelection()
697
+ : undefined,
698
+ });
699
+ for (const statement of generatedLogPermissions.policyStatements) {
700
+ this._aggregationFunction.role?.addToPrincipalPolicy(statement);
701
+ }
702
+ if (this.bedrockDocumentProcessingProps.network) {
703
+ this._aggregationFunction.role?.addToPrincipalPolicy(utilities_1.LambdaIamUtils.generateLambdaVPCPermissions());
704
+ }
705
+ }
706
+ // Always create a new LambdaInvoke task to allow proper state chaining
707
+ const stepId = `AggregationStep-${this._aggregationStepCounter}`;
708
+ this._aggregationStepCounter++;
709
+ // Pass chunk results as data content - the Lambda will format them for Bedrock
710
+ return new aws_stepfunctions_tasks_1.LambdaInvoke(this, stepId, {
711
+ lambdaFunction: this._aggregationFunction,
712
+ payload: aws_stepfunctions_1.TaskInput.fromObject({
713
+ 'documentId.$': '$.documentId',
714
+ 'contentType': 'data',
715
+ 'content': {
716
+ // Pass the chunk results as JSON string for the Lambda to process
717
+ 'data.$': 'States.JsonToString($.chunkResults)',
718
+ },
719
+ }),
720
+ // Store in both aggregatedResult AND processingResult for consistency with non-chunked flow
721
+ // This allows enrichment/post-processing steps to use $.processingResult regardless of chunking
722
+ resultPath: '$.aggregatedResult',
723
+ resultSelector: {
724
+ 'result.$': '$.Payload.result',
725
+ },
726
+ });
727
+ }
728
+ /**
729
+ * Creates the DynamoDB update step for storing aggregated results.
730
+ *
731
+ * Updates the document record with:
732
+ * - AggregatedResult: JSON string with classification, entities, and summary
733
+ * - WorkflowStatus: 'complete'
734
+ *
735
+ * @returns DynamoUpdateItem task for storing aggregated results
736
+ */
737
+ createUpdateAggregatedResultStep() {
738
+ return new aws_stepfunctions_tasks_1.DynamoUpdateItem(this, 'StoreAggregatedResult', {
739
+ table: this.documentProcessingTable,
740
+ key: {
741
+ DocumentId: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.stringAt('$.documentId')),
742
+ },
743
+ updateExpression: 'SET AggregatedResult = :result, WorkflowStatus = :status',
744
+ expressionAttributeValues: {
745
+ ':result': aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.jsonToString(aws_stepfunctions_1.JsonPath.objectAt('$.aggregatedResult'))),
746
+ ':status': aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString('complete'),
747
+ },
748
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
749
+ });
750
+ }
751
+ /**
752
+ * Creates the cleanup Lambda step for removing temporary chunk files.
753
+ *
754
+ * The cleanup Lambda:
755
+ * - Deletes all chunk files from S3 chunks/ prefix
756
+ * - Uses batch delete for efficiency (up to 1000 objects per request)
757
+ * - Logs errors but doesn't fail the workflow
758
+ *
759
+ * @returns LambdaInvoke task for chunk cleanup
760
+ */
761
+ createCleanupStep() {
762
+ // Create Lambda function only once
763
+ if (!this._cleanupFunction) {
764
+ const { region, account } = aws_cdk_lib_1.Stack.of(this);
765
+ const role = new aws_iam_1.Role(this, 'CleanupLambdaRole', {
766
+ assumedBy: new aws_iam_1.ServicePrincipal('lambda.amazonaws.com'),
767
+ inlinePolicies: {
768
+ CleanupPolicy: new aws_iam_1.PolicyDocument({
769
+ statements: [
770
+ // S3 access for deleting chunks only - least privilege
771
+ ...this.ingressAdapter.generateAdapterIAMPolicies(['s3:DeleteObject'], true),
772
+ ],
773
+ }),
774
+ },
775
+ });
776
+ const generatedLogPermissions = utilities_1.LambdaIamUtils.createLogsPermissions({
777
+ account,
778
+ functionName: 'bedrock-idp-cleanup',
779
+ region,
780
+ scope: this,
781
+ enableObservability: this.bedrockDocumentProcessingProps.enableObservability,
782
+ });
783
+ this.encryptionKey.grantEncryptDecrypt(role);
784
+ this._cleanupFunction = new aws_lambda_python_alpha_1.PythonFunction(this, 'CleanupFunction', {
785
+ functionName: generatedLogPermissions.uniqueFunctionName,
786
+ entry: path.join(__dirname, 'resources/cleanup'),
787
+ index: 'handler.py',
788
+ handler: 'handler',
789
+ runtime: framework_1.DefaultRuntimes.PYTHON,
790
+ architecture: aws_lambda_1.Architecture.X86_64,
791
+ role,
792
+ memorySize: 512,
793
+ timeout: aws_cdk_lib_1.Duration.minutes(5),
794
+ environment: {
795
+ ...powertools_config_1.PowertoolsConfig.generateDefaultLambdaConfig(this.bedrockDocumentProcessingProps.enableObservability, this.metricNamespace, this.metricServiceName),
796
+ },
797
+ environmentEncryption: this.encryptionKey,
798
+ vpc: this.bedrockDocumentProcessingProps.network
799
+ ? this.bedrockDocumentProcessingProps.network.vpc
800
+ : undefined,
801
+ vpcSubnets: this.bedrockDocumentProcessingProps.network
802
+ ? this.bedrockDocumentProcessingProps.network.applicationSubnetSelection()
803
+ : undefined,
804
+ });
805
+ for (const statement of generatedLogPermissions.policyStatements) {
806
+ this._cleanupFunction.role?.addToPrincipalPolicy(statement);
807
+ }
808
+ if (this.bedrockDocumentProcessingProps.network) {
809
+ this._cleanupFunction.role?.addToPrincipalPolicy(utilities_1.LambdaIamUtils.generateLambdaVPCPermissions());
810
+ }
811
+ }
812
+ return new aws_stepfunctions_tasks_1.LambdaInvoke(this, 'CleanupChunks', {
813
+ lambdaFunction: this._cleanupFunction,
814
+ payload: aws_stepfunctions_1.TaskInput.fromObject({
815
+ 'documentId.$': '$.documentId',
816
+ 'chunks.$': '$.chunkingResult.chunks',
817
+ }),
818
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
819
+ });
820
+ }
821
+ /**
822
+ * Creates the error handler for aggregation failures.
823
+ *
824
+ * When aggregation fails:
825
+ * - Updates DynamoDB with 'aggregation-failure' status
826
+ * - Moves document to failed/ prefix
827
+ *
828
+ * @returns Step Functions chain for handling aggregation errors
829
+ */
830
+ createAggregationErrorHandler() {
831
+ const updateFailureStatus = new aws_stepfunctions_tasks_1.DynamoUpdateItem(this, 'AggregationFailDDBUpdate', {
832
+ table: this.documentProcessingTable,
833
+ key: {
834
+ DocumentId: aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString(aws_stepfunctions_1.JsonPath.stringAt('$.documentId')),
835
+ },
836
+ updateExpression: 'SET WorkflowStatus = :newStatus',
837
+ expressionAttributeValues: {
838
+ ':newStatus': aws_stepfunctions_tasks_1.DynamoAttributeValue.fromString('aggregation-failure'),
839
+ },
840
+ resultPath: aws_stepfunctions_1.JsonPath.DISCARD,
841
+ });
842
+ // Use 'Chunked' prefix to avoid ID collisions with standard workflow
843
+ const moveToFailed = this.ingressAdapter.createFailedChain(this, 'Chunked');
844
+ return updateFailureStatus.next(moveToFailed);
845
+ }
228
846
  }
229
847
  exports.BedrockDocumentProcessing = BedrockDocumentProcessing;
230
848
  _a = JSII_RTTI_SYMBOL_1;
231
- BedrockDocumentProcessing[_a] = { fqn: "@cdklabs/cdk-appmod-catalog-blueprints.BedrockDocumentProcessing", version: "1.4.1" };
849
+ BedrockDocumentProcessing[_a] = { fqn: "@cdklabs/cdk-appmod-catalog-blueprints.BedrockDocumentProcessing", version: "1.6.0" };
232
850
  BedrockDocumentProcessing.DEFAULT_CLASSIFICATION_PROMPT = `
233
851
  Analyze the document below, and classify the type of document it is (eg. INVOICE, IDENTITY_DOCUMENT, RECEIPT, etc). The result should be in JSON and should follow the following structure (only respond in JSON with the following structure and do not use markdown to indicate the json, just output plain old json with nothing else):
234
852
 
@@ -258,4 +876,26 @@ BedrockDocumentProcessing.DEFAULT_PROCESSING_PROMPT = `
258
876
  Attached document is as follows:
259
877
 
260
878
  `;
261
- //# sourceMappingURL=data:application/json;base64,
879
+ BedrockDocumentProcessing.DEFAULT_AGGREGATION_PROMPT = `
880
+ You are given the processing results from multiple chunks of a large document that was split for processing.
881
+ Your task is to synthesize these chunk results into a single, coherent final result.
882
+
883
+ Instructions:
884
+ 1. Review all the chunk results provided below
885
+ 2. Merge and deduplicate any overlapping information (chunks may have overlapping pages)
886
+ 3. Synthesize the information into a unified, coherent result
887
+ 4. Maintain the same output format as the individual chunk results
888
+ 5. If the chunks contain summaries, create a comprehensive summary that covers all sections
889
+ 6. If the chunks contain entities, deduplicate and consolidate them
890
+ 7. Preserve important details from each chunk while avoiding redundancy
891
+
892
+ Return the result as JSON (only respond in JSON without markdown formatting):
893
+
894
+ {
895
+ "result": <SYNTHESIZED_RESULT>
896
+ }
897
+
898
+ The chunk results to aggregate are as follows:
899
+
900
+ `;
901
+ //# sourceMappingURL=data:application/json;base64,