@cdklabs/cdk-appmod-catalog-blueprints 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.jsii +701 -204
  2. package/README.md +100 -160
  3. package/lib/document-processing/adapter/adapter.d.ts +47 -0
  4. package/lib/document-processing/adapter/adapter.js +5 -0
  5. package/lib/document-processing/adapter/index.d.ts +2 -0
  6. package/lib/document-processing/adapter/index.js +19 -0
  7. package/lib/document-processing/adapter/queued-s3-adapter.d.ts +66 -0
  8. package/lib/document-processing/adapter/queued-s3-adapter.js +230 -0
  9. package/lib/document-processing/agentic-document-processing.d.ts +22 -0
  10. package/lib/document-processing/agentic-document-processing.js +11 -14
  11. package/lib/document-processing/base-document-processing.d.ts +8 -44
  12. package/lib/document-processing/base-document-processing.js +23 -190
  13. package/lib/document-processing/bedrock-document-processing.js +3 -13
  14. package/lib/document-processing/default-document-processing-config.d.ts +3 -0
  15. package/lib/document-processing/default-document-processing-config.js +14 -0
  16. package/lib/document-processing/index.d.ts +2 -0
  17. package/lib/document-processing/index.js +3 -1
  18. package/lib/document-processing/resources/default-bedrock-invoke/index.py +36 -24
  19. package/lib/document-processing/resources/default-sqs-consumer/index.py +10 -5
  20. package/lib/document-processing/resources/default-strands-agent/index.py +8 -5
  21. package/lib/document-processing/tests/agentic-document-processing-nag.test.js +6 -2
  22. package/lib/document-processing/tests/agentic-document-processing.test.js +5 -19
  23. package/lib/document-processing/tests/bedrock-document-processing-nag.test.js +6 -2
  24. package/lib/framework/custom-resource/default-runtimes.js +1 -1
  25. package/lib/framework/foundation/access-log.js +1 -1
  26. package/lib/framework/foundation/eventbridge-broker.js +1 -1
  27. package/lib/framework/foundation/network.js +1 -1
  28. package/lib/tsconfig.tsbuildinfo +1 -1
  29. package/lib/utilities/data-loader.js +1 -1
  30. package/lib/utilities/lambda-iam-utils.js +1 -1
  31. package/lib/utilities/observability/cloudfront-distribution-observability-property-injector.js +1 -1
  32. package/lib/utilities/observability/default-observability-config.d.ts +9 -0
  33. package/lib/utilities/observability/default-observability-config.js +20 -0
  34. package/lib/utilities/observability/index.d.ts +1 -0
  35. package/lib/utilities/observability/index.js +2 -1
  36. package/lib/utilities/observability/lambda-observability-property-injector.js +1 -1
  37. package/lib/utilities/observability/powertools-config.js +1 -1
  38. package/lib/utilities/observability/state-machine-observability-property-injector.js +1 -1
  39. package/lib/webapp/frontend-construct.js +1 -1
  40. package/package.json +9 -9
@@ -103,9 +103,6 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
103
103
  enableObservability: this.bedrockDocumentProcessingProps.enableObservability,
104
104
  });
105
105
  this.encryptionKey.grantEncryptDecrypt(role);
106
- if (this.bucketEncryptionKey) {
107
- this.bucketEncryptionKey.grantEncryptDecrypt(role);
108
- }
109
106
  const bedrockFunction = new aws_lambda_python_alpha_1.PythonFunction(this, 'BedrockClassificationFunction', {
110
107
  functionName: generatedLogPermissions.uniqueFunctionName,
111
108
  architecture: aws_lambda_1.Architecture.X86_64,
@@ -160,9 +157,6 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
160
157
  scope: this,
161
158
  });
162
159
  this.encryptionKey.grantEncryptDecrypt(role);
163
- if (this.bucketEncryptionKey) {
164
- this.bucketEncryptionKey.grantEncryptDecrypt(role);
165
- }
166
160
  const bedrockFunction = new aws_lambda_python_alpha_1.PythonFunction(this, 'BedrockExtractionFunction', {
167
161
  functionName: generatedLogPermissions.uniqueFunctionName,
168
162
  runtime: framework_1.DefaultRuntimes.PYTHON,
@@ -203,11 +197,7 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
203
197
  inlinePolicies: {
204
198
  BedrockInvokePolicy: new aws_iam_1.PolicyDocument({
205
199
  statements: [
206
- new aws_iam_1.PolicyStatement({
207
- effect: aws_iam_1.Effect.ALLOW,
208
- actions: ['s3:GetObject'],
209
- resources: [`${this.bucket.bucketArn}/*`],
210
- }),
200
+ ...this.ingressAdapter.generateAdapterIAMPolicies(),
211
201
  new aws_iam_1.PolicyStatement({
212
202
  effect: aws_iam_1.Effect.ALLOW,
213
203
  actions: [
@@ -263,7 +253,7 @@ class BedrockDocumentProcessing extends base_document_processing_1.BaseDocumentP
263
253
  }
264
254
  exports.BedrockDocumentProcessing = BedrockDocumentProcessing;
265
255
  _a = JSII_RTTI_SYMBOL_1;
266
- BedrockDocumentProcessing[_a] = { fqn: "@cdklabs/cdk-appmod-catalog-blueprints.BedrockDocumentProcessing", version: "1.0.1" };
256
+ BedrockDocumentProcessing[_a] = { fqn: "@cdklabs/cdk-appmod-catalog-blueprints.BedrockDocumentProcessing", version: "1.2.0" };
267
257
  BedrockDocumentProcessing.DEFAULT_CLASSIFICATION_MODEL_ID = aws_bedrock_1.FoundationModelIdentifier.ANTHROPIC_CLAUDE_3_7_SONNET_20250219_V1_0;
268
258
  BedrockDocumentProcessing.DEFAULT_PROCESSING_MODEL_ID = aws_bedrock_1.FoundationModelIdentifier.ANTHROPIC_CLAUDE_3_7_SONNET_20250219_V1_0;
269
259
  BedrockDocumentProcessing.DEFAULT_CLASSIFICATION_PROMPT = `
@@ -295,4 +285,4 @@ BedrockDocumentProcessing.DEFAULT_PROCESSING_PROMPT = `
295
285
  Attached document is as follows:
296
286
 
297
287
  `;
298
- //# sourceMappingURL=data:application/json;base64,
288
+ //# sourceMappingURL=data:application/json;base64,
@@ -0,0 +1,3 @@
1
+ export declare class DefaultDocumentProcessingConfig {
2
+ static readonly DEFAULT_OBSERVABILITY_METRIC_SVC_NAME = "document-processing";
3
+ }
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ var _a;
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.DefaultDocumentProcessingConfig = void 0;
5
+ const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
6
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
7
+ // SPDX-License-Identifier: Apache-2.0
8
+ class DefaultDocumentProcessingConfig {
9
+ }
10
+ exports.DefaultDocumentProcessingConfig = DefaultDocumentProcessingConfig;
11
+ _a = JSII_RTTI_SYMBOL_1;
12
+ DefaultDocumentProcessingConfig[_a] = { fqn: "@cdklabs/cdk-appmod-catalog-blueprints.DefaultDocumentProcessingConfig", version: "1.2.0" };
13
+ DefaultDocumentProcessingConfig.DEFAULT_OBSERVABILITY_METRIC_SVC_NAME = 'document-processing';
14
+ //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZGVmYXVsdC1kb2N1bWVudC1wcm9jZXNzaW5nLWNvbmZpZy5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uLy4uL3VzZS1jYXNlcy9kb2N1bWVudC1wcm9jZXNzaW5nL2RlZmF1bHQtZG9jdW1lbnQtcHJvY2Vzc2luZy1jb25maWcudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6Ijs7Ozs7QUFBQSxxRUFBcUU7QUFDckUsc0NBQXNDO0FBRXRDLE1BQWEsK0JBQStCOztBQUE1QywwRUFFQzs7O0FBRHdCLHFFQUFxQyxHQUFHLHFCQUFxQixDQUFDIiwic291cmNlc0NvbnRlbnQiOlsiLy8gQ29weXJpZ2h0IEFtYXpvbi5jb20sIEluYy4gb3IgaXRzIGFmZmlsaWF0ZXMuIEFsbCBSaWdodHMgUmVzZXJ2ZWQuXG4vLyBTUERYLUxpY2Vuc2UtSWRlbnRpZmllcjogQXBhY2hlLTIuMFxuXG5leHBvcnQgY2xhc3MgRGVmYXVsdERvY3VtZW50UHJvY2Vzc2luZ0NvbmZpZyB7XG4gIHB1YmxpYyBzdGF0aWMgcmVhZG9ubHkgREVGQVVMVF9PQlNFUlZBQklMSVRZX01FVFJJQ19TVkNfTkFNRSA9ICdkb2N1bWVudC1wcm9jZXNzaW5nJztcbn0iXX0=
@@ -1,3 +1,5 @@
1
1
  export * from './base-document-processing';
2
2
  export * from './bedrock-document-processing';
3
3
  export * from './agentic-document-processing';
4
+ export * from './adapter';
5
+ export * from './default-document-processing-config';
@@ -17,4 +17,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
17
17
  __exportStar(require("./base-document-processing"), exports);
18
18
  __exportStar(require("./bedrock-document-processing"), exports);
19
19
  __exportStar(require("./agentic-document-processing"), exports);
20
- //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi91c2UtY2FzZXMvZG9jdW1lbnQtcHJvY2Vzc2luZy9pbmRleC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiOzs7Ozs7Ozs7Ozs7Ozs7O0FBQUEsNkRBQTJDO0FBQzNDLGdFQUE4QztBQUM5QyxnRUFBOEMiLCJzb3VyY2VzQ29udGVudCI6WyJleHBvcnQgKiBmcm9tICcuL2Jhc2UtZG9jdW1lbnQtcHJvY2Vzc2luZyc7XG5leHBvcnQgKiBmcm9tICcuL2JlZHJvY2stZG9jdW1lbnQtcHJvY2Vzc2luZyc7XG5leHBvcnQgKiBmcm9tICcuL2FnZW50aWMtZG9jdW1lbnQtcHJvY2Vzc2luZyc7Il19
20
+ __exportStar(require("./adapter"), exports);
21
+ __exportStar(require("./default-document-processing-config"), exports);
22
+ //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi91c2UtY2FzZXMvZG9jdW1lbnQtcHJvY2Vzc2luZy9pbmRleC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiOzs7Ozs7Ozs7Ozs7Ozs7O0FBQUEsNkRBQTJDO0FBQzNDLGdFQUE4QztBQUM5QyxnRUFBOEM7QUFDOUMsNENBQTBCO0FBQzFCLHVFQUFxRCIsInNvdXJjZXNDb250ZW50IjpbImV4cG9ydCAqIGZyb20gJy4vYmFzZS1kb2N1bWVudC1wcm9jZXNzaW5nJztcbmV4cG9ydCAqIGZyb20gJy4vYmVkcm9jay1kb2N1bWVudC1wcm9jZXNzaW5nJztcbmV4cG9ydCAqIGZyb20gJy4vYWdlbnRpYy1kb2N1bWVudC1wcm9jZXNzaW5nJztcbmV4cG9ydCAqIGZyb20gJy4vYWRhcHRlcic7XG5leHBvcnQgKiBmcm9tICcuL2RlZmF1bHQtZG9jdW1lbnQtcHJvY2Vzc2luZy1jb25maWcnOyJdfQ==
@@ -13,28 +13,11 @@ tracer = Tracer()
13
13
  @metrics.log_metrics
14
14
  @tracer.capture_lambda_handler
15
15
  def handler(event, context):
16
- bucket = event['bucket']
17
- key = event['key']
18
16
  invoke_type = os.environ["INVOKE_TYPE"]
19
17
  tracer.put_annotation(key="invoke_type", value=invoke_type)
20
18
  tracer.put_annotation(key="documentId", value=event["documentId"])
21
19
  metrics.add_dimension(name="invoke_type", value=invoke_type)
22
-
23
- # Check file type
24
- ext = key.lower().split('.')[-1]
25
- if ext not in ['jpg', 'jpeg', 'png', 'pdf']:
26
- raise ValueError(f"Unsupported file type: {ext}")
27
-
28
- media_type = {'jpg': 'image/jpeg', 'jpeg': 'image/jpeg', 'png': 'image/png', 'pdf': 'application/pdf'}[ext]
29
-
30
- # Download file to /tmp
31
- local_path = f"/tmp/{key.split('/')[-1]}"
32
- s3.download_file(bucket, key, local_path)
33
-
34
- # Read and encode file
35
- with open(local_path, 'rb') as f:
36
- file_data = base64.b64encode(f.read()).decode('utf-8')
37
-
20
+ content_type = event["contentType"]
38
21
  # Format prompt if classification result exists
39
22
  prompt = os.environ['PROMPT']
40
23
  if 'classificationResult' in event:
@@ -43,21 +26,50 @@ def handler(event, context):
43
26
 
44
27
  # Build content based on file type
45
28
  content = [{'type': 'text', 'text': prompt}]
46
- if ext == 'pdf':
47
- content.append({'type': 'document', 'source': {'type': 'base64', 'media_type': media_type, 'data': file_data}})
48
- else:
49
- content.append({'type': 'image', 'source': {'type': 'base64', 'media_type': media_type, 'data': file_data}})
29
+ if content_type == 'file':
30
+ content_location = event['content']['location']
31
+
32
+ if content_location == 's3':
33
+ bucket = event['content']['bucket']
34
+ key = event['content']['key']
35
+
36
+ # Check file type
37
+ ext = key.lower().split('.')[-1]
38
+ if ext not in ['jpg', 'jpeg', 'png', 'pdf']:
39
+ raise ValueError(f"Unsupported file type: {ext}")
40
+
41
+ media_type = {'jpg': 'image/jpeg', 'jpeg': 'image/jpeg', 'png': 'image/png', 'pdf': 'application/pdf'}[ext]
42
+
43
+ # Download file to /tmp
44
+ local_path = f"/tmp/{key.split('/')[-1]}"
45
+ s3.download_file(bucket, key, local_path)
46
+
47
+ # Read and encode file
48
+ with open(local_path, 'rb') as f:
49
+ file_data = base64.b64encode(f.read()).decode('utf-8')
50
+
51
+
52
+ if ext == 'pdf':
53
+ content.append({'type': 'document', 'source': {'type': 'base64', 'media_type': media_type, 'data': file_data}})
54
+ else:
55
+ content.append({'type': 'image', 'source': {'type': 'base64', 'media_type': media_type, 'data': file_data}})
50
56
 
57
+ elif content_type == 'data':
58
+ content.append({
59
+ 'type': 'text',
60
+ 'text': event['content']['data']
61
+ })
62
+
51
63
  # Invoke Bedrock
52
64
  response = bedrock.invoke_model(
53
65
  modelId=os.environ['MODEL_ID'],
54
66
  body=json.dumps({
55
67
  'anthropic_version': 'bedrock-2023-05-31',
56
- 'max_tokens': 1000,
68
+ 'max_tokens': os.getenv('INVOKE_MAX_TOKENS', 1000),
57
69
  'messages': [{'role': 'user', 'content': content}]
58
70
  })
59
71
  )
60
-
72
+
61
73
  response_payload = response['body'].read()
62
74
  metrics.add_metric(name="SuccessfulInvocation", unit=MetricUnit.Count, value=1)
63
75
  return json.loads(json.loads(response_payload)["content"][0]["text"])
@@ -19,6 +19,7 @@ def handler(event, context):
19
19
  print(f'SQS Consumer: Received event: {json.dumps(event, indent=2)}')
20
20
 
21
21
  results = []
22
+ raw_prefix = os.getenv('RAW_PREFIX', 'raw/')
22
23
 
23
24
  for record in event['Records']:
24
25
  try:
@@ -53,20 +54,24 @@ def handler(event, context):
53
54
  # Generate unique document ID from S3 key and timestamp
54
55
  timestamp = int(time.time() * 1000)
55
56
  document_id = (key
56
- .replace('raw/', '', 1) # Remove raw/ prefix
57
+ .replace(raw_prefix, '', 1) # Remove raw/ prefix
57
58
  .rsplit('.', 1)[0] # Remove file extension
58
59
  )
59
60
  document_id = re.sub(r'[^a-zA-Z0-9-]', '-', document_id) + '-' + str(timestamp)
60
61
 
61
62
  # Extract filename from key
62
- filename = key.replace('raw/', '', 1) # Remove raw/ prefix
63
+ filename = key.replace(raw_prefix, '', 1) # Remove raw/ prefix
63
64
 
64
65
  # Prepare Step Functions execution input
65
66
  step_function_input = {
66
67
  'documentId': document_id,
67
- 'bucket': bucket,
68
- 'key': key,
69
- 'filename': filename,
68
+ 'contentType': 'file',
69
+ 'content': {
70
+ 'location': 's3',
71
+ 'bucket': bucket,
72
+ 'key': key,
73
+ 'filename': filename
74
+ },
70
75
  'eventTime': event_time,
71
76
  'eventName': event_name,
72
77
  'source': 'sqs-consumer'
@@ -66,8 +66,8 @@ def parse_s3_path(s3_path):
66
66
 
67
67
  @tracer.capture_method
68
68
  def download_attached_document(event):
69
- bucket = event['bucket']
70
- key = event['key']
69
+ bucket = event['content']['bucket']
70
+ key = event['content']['key']
71
71
 
72
72
  # Download file to /tmp
73
73
  local_path = f"/tmp/{key.split('/')[-1]}"
@@ -84,6 +84,7 @@ def handler(event, context):
84
84
  prompt = os.getenv("PROMPT")
85
85
  system_prompt = os.getenv("SYSTEM_PROMPT")
86
86
  invoke_type = os.environ["INVOKE_TYPE"]
87
+ content_type = event['contentType']
87
88
 
88
89
  tracer.put_annotation(key="invoke_type", value=invoke_type)
89
90
  metrics.add_dimension(name="invoke_type", value=invoke_type)
@@ -93,14 +94,16 @@ def handler(event, context):
93
94
 
94
95
  if system_prompt is None:
95
96
  system_prompt = "You're a document analysis specialist. You specialized in analyzing provided documents using the tools that have been provided."
96
-
97
- local_path_attached_doc = download_attached_document(event)
98
97
 
99
98
  if 'classificationResult' in event:
100
99
  classification = event['classificationResult']['documentClassification']
101
100
  prompt = prompt.replace("[ACTUAL_CLASSIFICATION]", classification)
102
101
 
103
- prompt += f" Attached document is located in {local_path_attached_doc}"
102
+ if content_type == 'file' and event['content']['location'] == 's3':
103
+ local_path_attached_doc = download_attached_document(event)
104
+ prompt += f" Attached document is located in {local_path_attached_doc}"
105
+ elif content_type == 'data':
106
+ prompt += f" Attached document content are as follows: {event['content']['data']}"
104
107
 
105
108
  agent = Agent(model=model_id, tools=agent_tools + [file_read], system_prompt=system_prompt)
106
109
  response = agent(prompt)
@@ -5,6 +5,7 @@ const assertions_1 = require("aws-cdk-lib/assertions");
5
5
  const aws_s3_1 = require("aws-cdk-lib/aws-s3");
6
6
  const cdk_nag_1 = require("cdk-nag");
7
7
  const framework_1 = require("../../framework");
8
+ const adapter_1 = require("../adapter");
8
9
  const agentic_document_processing_1 = require("../agentic-document-processing");
9
10
  // Create app and stack
10
11
  const app = new aws_cdk_lib_1.App();
@@ -21,9 +22,12 @@ const bucket = new aws_s3_1.Bucket(stack, 'AgenticDocumentProcessingBucket', {
21
22
  serverAccessLogsPrefix: accessLog.bucketPrefix,
22
23
  enforceSSL: true,
23
24
  });
25
+ const adapter = new adapter_1.QueuedS3Adapter({
26
+ bucket,
27
+ });
24
28
  // Create the main AgenticDocumentProcessing construct
25
29
  new agentic_document_processing_1.AgenticDocumentProcessing(stack, 'AgenticDocumentProcessing', {
26
- bucket,
30
+ ingressAdapter: adapter,
27
31
  useCrossRegionInference: true,
28
32
  processingAgentParameters: {
29
33
  agentSystemPrompt: `
@@ -104,4 +108,4 @@ test('No unsuppressed errors', () => {
104
108
  }
105
109
  expect(errors).toHaveLength(0);
106
110
  });
107
- //# sourceMappingURL=data:application/json;base64,
111
+ //# sourceMappingURL=data:application/json;base64,
@@ -72,26 +72,12 @@ describe('AgenticDocumentProcessing', () => {
72
72
  basicTemplate.hasResourceProperties('AWS::IAM::Role', {
73
73
  Policies: [{
74
74
  PolicyDocument: {
75
- Statement: [
76
- {
77
- Effect: 'Allow',
78
- Action: 's3:GetObject',
79
- Resource: {
80
- 'Fn::Join': [
81
- '',
82
- [
83
- { 'Fn::GetAtt': [assertions_1.Match.anyValue(), 'Arn'] },
84
- '/*',
85
- ],
86
- ],
87
- },
88
- },
89
- {
75
+ Statement: assertions_1.Match.arrayWith([
76
+ assertions_1.Match.objectLike({
90
77
  Effect: 'Allow',
91
78
  Action: ['bedrock:InvokeModel', 'bedrock:InvokeModelWithResponseStream'],
92
- Resource: assertions_1.Match.anyValue(),
93
- },
94
- ],
79
+ }),
80
+ ]),
95
81
  },
96
82
  PolicyName: 'BedrockInvokePolicy',
97
83
  }],
@@ -122,4 +108,4 @@ describe('AgenticDocumentProcessing', () => {
122
108
  });
123
109
  });
124
110
  });
125
- //# sourceMappingURL=data:application/json;base64,
111
+ //# sourceMappingURL=data:application/json;base64,