@cdklabs/cdk-appmod-catalog-blueprints 1.16.0 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/.jsii +992 -168
  2. package/lib/document-processing/adapter/queued-s3-adapter.js +1 -1
  3. package/lib/document-processing/agentic-document-processing.js +1 -1
  4. package/lib/document-processing/base-document-processing.js +1 -1
  5. package/lib/document-processing/bedrock-document-processing.d.ts +1 -0
  6. package/lib/document-processing/bedrock-document-processing.js +10 -6
  7. package/lib/document-processing/default-document-processing-config.js +1 -1
  8. package/lib/document-processing/index.d.ts +2 -0
  9. package/lib/document-processing/index.js +3 -1
  10. package/lib/document-processing/localstack-agentic-document-processing.d.ts +20 -0
  11. package/lib/document-processing/localstack-agentic-document-processing.js +66 -0
  12. package/lib/document-processing/localstack-bedrock-document-processing.d.ts +16 -0
  13. package/lib/document-processing/localstack-bedrock-document-processing.js +38 -0
  14. package/lib/document-processing/resources/cleanup/handler.py +16 -1
  15. package/lib/document-processing/resources/default-localstack-invoke/index.py +184 -0
  16. package/lib/document-processing/resources/default-localstack-invoke/provider_runtime.py +251 -0
  17. package/lib/document-processing/resources/default-localstack-invoke/requirements.txt +5 -0
  18. package/lib/document-processing/resources/default-sqs-consumer/index.py +17 -2
  19. package/lib/document-processing/resources/pdf-chunking/handler.py +16 -1
  20. package/lib/document-processing/tests/localstack-agentic-document-processing.test.d.ts +1 -0
  21. package/lib/document-processing/tests/localstack-agentic-document-processing.test.js +78 -0
  22. package/lib/document-processing/tests/localstack-document-processing.test.d.ts +1 -0
  23. package/lib/document-processing/tests/localstack-document-processing.test.js +116 -0
  24. package/lib/framework/agents/base-agent.js +1 -1
  25. package/lib/framework/agents/batch-agent.d.ts +1 -0
  26. package/lib/framework/agents/batch-agent.js +7 -4
  27. package/lib/framework/agents/default-agent-config.js +1 -1
  28. package/lib/framework/agents/index.d.ts +1 -0
  29. package/lib/framework/agents/index.js +2 -1
  30. package/lib/framework/agents/interactive-agent.js +88 -11
  31. package/lib/framework/agents/knowledge-base/base-knowledge-base.js +1 -1
  32. package/lib/framework/agents/knowledge-base/bedrock-knowledge-base.js +1 -1
  33. package/lib/framework/agents/localstack-batch-agent.d.ts +15 -0
  34. package/lib/framework/agents/localstack-batch-agent.js +33 -0
  35. package/lib/framework/agents/resources/default-ollama-agent/batch.py +396 -0
  36. package/lib/framework/agents/resources/default-ollama-agent/models.py +7 -0
  37. package/lib/framework/agents/resources/default-ollama-agent/requirements.txt +9 -0
  38. package/lib/framework/agents/resources/default-ollama-agent/runtime_support.py +237 -0
  39. package/lib/framework/agents/resources/default-ollama-agent/utils.py +77 -0
  40. package/lib/framework/bedrock/bedrock.d.ts +9 -0
  41. package/lib/framework/bedrock/bedrock.js +20 -10
  42. package/lib/framework/custom-resource/default-runtimes.js +1 -1
  43. package/lib/framework/foundation/access-log.js +1 -1
  44. package/lib/framework/foundation/eventbridge-broker.js +1 -1
  45. package/lib/framework/foundation/network.d.ts +1 -1
  46. package/lib/framework/foundation/network.js +8 -3
  47. package/lib/framework/index.d.ts +1 -0
  48. package/lib/framework/index.js +2 -1
  49. package/lib/framework/localstack/index.d.ts +1 -0
  50. package/lib/framework/localstack/index.js +18 -0
  51. package/lib/framework/localstack/localstack-config.d.ts +79 -0
  52. package/lib/framework/localstack/localstack-config.js +49 -0
  53. package/lib/framework/tests/localstack-batch-agent.test.d.ts +1 -0
  54. package/lib/framework/tests/localstack-batch-agent.test.js +67 -0
  55. package/lib/tsconfig.tsbuildinfo +1 -1
  56. package/lib/utilities/data-loader.js +1 -1
  57. package/lib/utilities/lambda-iam-utils.js +1 -1
  58. package/lib/utilities/observability/cloudfront-distribution-observability-property-injector.js +1 -1
  59. package/lib/utilities/observability/cloudwatch-transaction-search.js +1 -1
  60. package/lib/utilities/observability/default-observability-config.js +1 -1
  61. package/lib/utilities/observability/lambda-observability-property-injector.js +1 -1
  62. package/lib/utilities/observability/log-group-data-protection-utils.js +1 -1
  63. package/lib/utilities/observability/powertools-config.js +1 -1
  64. package/lib/utilities/observability/state-machine-observability-property-injector.js +1 -1
  65. package/lib/webapp/frontend-construct.js +1 -1
  66. package/package.json +1 -1
@@ -0,0 +1,66 @@
1
+ "use strict";
2
+ var _a;
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.LocalStackAgenticDocumentProcessing = void 0;
5
+ const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
6
+ const path = require("path");
7
+ const aws_lambda_1 = require("aws-cdk-lib/aws-lambda");
8
+ const aws_stepfunctions_tasks_1 = require("aws-cdk-lib/aws-stepfunctions-tasks");
9
+ const agentic_document_processing_1 = require("./agentic-document-processing");
10
+ const localstack_batch_agent_1 = require("../framework/agents/localstack-batch-agent");
11
+ const localstack_1 = require("../framework/localstack");
12
+ class LocalStackAgenticDocumentProcessing extends agentic_document_processing_1.AgenticDocumentProcessing {
13
+ constructor(scope, id, props) {
14
+ super(scope, id, {
15
+ ...props,
16
+ _skipBedrockVpcEndpoints: true,
17
+ });
18
+ this.applyLocalStackEnvironment(props.localStack || {});
19
+ }
20
+ resolveBedrockInvokeEntry() {
21
+ return path.join(__dirname, 'resources/default-localstack-invoke');
22
+ }
23
+ processingStep() {
24
+ if (!this._localStackBatchAgent) {
25
+ const agentProps = this.bedrockDocumentProcessingProps;
26
+ const processingAgentProps = agentProps.processingAgentParameters;
27
+ this._localStackBatchAgent = new localstack_batch_agent_1.LocalStackBatchAgent(this, 'IDPLocalStackBatchAgent', {
28
+ ...processingAgentProps,
29
+ localStack: agentProps.localStack,
30
+ });
31
+ const adapterPolicyStatements = this.ingressAdapter.generateAdapterIAMPolicies();
32
+ for (const statement of adapterPolicyStatements) {
33
+ this._localStackBatchAgent.agentRole.addToPrincipalPolicy(statement);
34
+ }
35
+ }
36
+ if (this._localStackProcessingStepCounter === undefined) {
37
+ this._localStackProcessingStepCounter = 0;
38
+ }
39
+ const stepId = `ProcessingStep-${this._localStackProcessingStepCounter}`;
40
+ this._localStackProcessingStepCounter++;
41
+ return new aws_stepfunctions_tasks_1.LambdaInvoke(this, stepId, {
42
+ lambdaFunction: this._localStackBatchAgent.agentFunction,
43
+ resultPath: '$.processingResult',
44
+ resultSelector: {
45
+ 'result.$': '$.Payload.result',
46
+ },
47
+ });
48
+ }
49
+ applyLocalStackEnvironment(localStack) {
50
+ const localStackEnv = localstack_1.LocalStackIntegrationUtils.toLambdaEnvironment({
51
+ enabled: true,
52
+ ...localStack,
53
+ });
54
+ for (const child of this.node.findAll()) {
55
+ if (child instanceof aws_lambda_1.Function) {
56
+ for (const [key, value] of Object.entries(localStackEnv)) {
57
+ child.addEnvironment(key, value);
58
+ }
59
+ }
60
+ }
61
+ }
62
+ }
63
+ exports.LocalStackAgenticDocumentProcessing = LocalStackAgenticDocumentProcessing;
64
+ _a = JSII_RTTI_SYMBOL_1;
65
+ LocalStackAgenticDocumentProcessing[_a] = { fqn: "@cdklabs/cdk-appmod-catalog-blueprints.LocalStackAgenticDocumentProcessing", version: "1.17.0" };
66
+ //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibG9jYWxzdGFjay1hZ2VudGljLWRvY3VtZW50LXByb2Nlc3NpbmcuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi91c2UtY2FzZXMvZG9jdW1lbnQtcHJvY2Vzc2luZy9sb2NhbHN0YWNrLWFnZW50aWMtZG9jdW1lbnQtcHJvY2Vzc2luZy50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiOzs7OztBQUFBLDZCQUE2QjtBQUM3Qix1REFBb0U7QUFDcEUsaUZBQW1FO0FBRW5FLCtFQUEwRztBQUUxRyx1RkFBa0Y7QUFDbEYsd0RBQWtHO0FBV2xHLE1BQWEsbUNBQW9DLFNBQVEsdURBQXlCO0lBSWhGLFlBQVksS0FBZ0IsRUFBRSxFQUFVLEVBQUUsS0FBK0M7UUFDdkYsS0FBSyxDQUFDLEtBQUssRUFBRSxFQUFFLEVBQUU7WUFDZixHQUFHLEtBQUs7WUFDUix3QkFBd0IsRUFBRSxJQUFJO1NBQ0csQ0FBQyxDQUFDO1FBQ3JDLElBQUksQ0FBQywwQkFBMEIsQ0FBQyxLQUFLLENBQUMsVUFBVSxJQUFJLEVBQUUsQ0FBQyxDQUFDO0lBQzFELENBQUM7SUFFUyx5QkFBeUI7UUFDakMsT0FBTyxJQUFJLENBQUMsSUFBSSxDQUFDLFNBQVMsRUFBRSxxQ0FBcUMsQ0FBQyxDQUFDO0lBQ3JFLENBQUM7SUFFUyxjQUFjO1FBQ3RCLElBQUksQ0FBQyxJQUFJLENBQUMscUJBQXFCLEVBQUUsQ0FBQztZQUNoQyxNQUFNLFVBQVUsR0FBRyxJQUFJLENBQUMsOEJBQTBFLENBQUM7WUFDbkcsTUFBTSxvQkFBb0IsR0FBRyxVQUFVLENBQUMseUJBQXlCLENBQUM7WUFFbEUsSUFBSSxDQUFDLHFCQUFxQixHQUFHLElBQUksNkNBQW9CLENBQUMsSUFBSSxFQUFFLHlCQUF5QixFQUFFO2dCQUNyRixHQUFHLG9CQUFvQjtnQkFDdkIsVUFBVSxFQUFFLFVBQVUsQ0FBQyxVQUFVO2FBQ2xDLENBQUMsQ0FBQztZQUVILE1BQU0sdUJBQXVCLEdBQUcsSUFBSSxDQUFDLGNBQWMsQ0FBQywwQkFBMEIsRUFBRSxDQUFDO1lBQ2pGLEtBQUssTUFBTSxTQUFTLElBQUksdUJBQXVCLEVBQUUsQ0FBQztnQkFDaEQsSUFBSSxDQUFDLHFCQUFxQixDQUFDLFNBQVMsQ0FBQyxvQkFBb0IsQ0FBQyxTQUFTLENBQUMsQ0FBQztZQUN2RSxDQUFDO1FBQ0gsQ0FBQztRQUVELElBQUksSUFBSSxDQUFDLGdDQUFnQyxLQUFLLFNBQVMsRUFBRSxDQUFDO1lBQ3hELElBQUksQ0FBQyxnQ0FBZ0MsR0FBRyxDQUFDLENBQUM7UUFDNUMsQ0FBQztRQUVELE1BQU0sTUFBTSxHQUFHLGtCQUFrQixJQUFJLENBQUMsZ0NBQWdDLEVBQUUsQ0FBQztRQUN6RSxJQUFJLENBQUMsZ0NBQWdDLEVBQUUsQ0FBQztRQUV4QyxPQUFPLElBQUksc0NBQVksQ0FBQyxJQUFJLEVBQUUsTUFBTSxFQUFFO1lBQ3BDLGNBQWMsRUFBRSxJQUFJLENBQUMscUJBQXFCLENBQUMsYUFBYztZQUN6RCxVQUFVLEVBQUUsb0JBQW9CO1lBQ2hDLGNBQWMsRUFBRTtnQkFDZCxVQUFVLEVBQUUsa0JBQWtCO2FBQy9CO1NBQ0YsQ0FBQyxDQUFDO0lBQ0wsQ0FBQztJQUVPLDBCQUEwQixDQUFDLFVBQXVDO1FBQ3hFLE1BQU0sYUFBYSxHQUFHLHVDQUEwQixDQUFDLG1CQUFtQixDQUFDO1lBQ25FLE9BQU8sRUFBRSxJQUFJO1lBQ2IsR0FBRyxVQUFVO1NBQ2QsQ0FBQyxDQUFDO1FBRUgsS0FBSyxNQUFNLEtBQUssSUFBSSxJQUFJLENBQUMsSUFBSSxDQUFDLE9BQU8sRUFBRSxFQUFFLENBQUM7WUFDeEMsSUFBSSxLQUFLLFlBQVkscUJBQWMsRUFBRSxDQUFDO2dCQUNwQyxLQUFLLE1BQU0sQ0FBQyxHQUFHLEVBQUUsS0FBSyxDQUFDLElBQUksTUFBTSxDQUFDLE9BQU8sQ0FBQyxhQUFhLENBQUMsRUFBRSxDQUFDO29CQUN6RCxLQUFLLENBQUMsY0FBYyxDQUFDLEdBQUcsRUFBRSxLQUFLLENBQUMsQ0FBQztnQkFDbkMsQ0FBQztZQUNILENBQUM7UUFDSCxDQUFDO0lBQ0gsQ0FBQzs7QUE3REgsa0ZBOERDIiwic291cmNlc0NvbnRlbnQiOlsiaW1wb3J0ICogYXMgcGF0aCBmcm9tICdwYXRoJztcbmltcG9ydCB7IEZ1bmN0aW9uIGFzIExhbWJkYUZ1bmN0aW9uIH0gZnJvbSAnYXdzLWNkay1saWIvYXdzLWxhbWJkYSc7XG5pbXBvcnQgeyBMYW1iZGFJbnZva2UgfSBmcm9tICdhd3MtY2RrLWxpYi9hd3Mtc3RlcGZ1bmN0aW9ucy10YXNrcyc7XG5pbXBvcnQgeyBDb25zdHJ1Y3QgfSBmcm9tICdjb25zdHJ1Y3RzJztcbmltcG9ydCB7IEFnZW50aWNEb2N1bWVudFByb2Nlc3NpbmcsIEFnZW50aWNEb2N1bWVudFByb2Nlc3NpbmdQcm9wcyB9IGZyb20gJy4vYWdlbnRpYy1kb2N1bWVudC1wcm9jZXNzaW5nJztcbmltcG9ydCB7IERvY3VtZW50UHJvY2Vzc2luZ1N0ZXBUeXBlIH0gZnJvbSAnLi9iYXNlLWRvY3VtZW50LXByb2Nlc3NpbmcnO1xuaW1wb3J0IHsgTG9jYWxTdGFja0JhdGNoQWdlbnQgfSBmcm9tICcuLi9mcmFtZXdvcmsvYWdlbnRzL2xvY2Fsc3RhY2stYmF0Y2gtYWdlbnQnO1xuaW1wb3J0IHsgTG9jYWxTdGFja0VuZHBvaW50T3ZlcnJpZGVzLCBMb2NhbFN0YWNrSW50ZWdyYXRpb25VdGlscyB9IGZyb20gJy4uL2ZyYW1ld29yay9sb2NhbHN0YWNrJztcblxuZXhwb3J0IGludGVyZmFjZSBMb2NhbFN0YWNrQWdlbnRpY0RvY3VtZW50UHJvY2Vzc2luZ1Byb3BzIGV4dGVuZHMgQWdlbnRpY0RvY3VtZW50UHJvY2Vzc2luZ1Byb3BzIHtcbiAgLyoqXG4gICAqIExvY2FsU3RhY2sgZW5kcG9pbnQgcm91dGluZyBjb25maWd1cmF0aW9uIGZvciBMYW1iZGEgcnVudGltZSBTREsgY2FsbHMuXG4gICAqXG4gICAqIEBkZWZhdWx0IHsgZW5hYmxlZDogdHJ1ZSB9XG4gICAqL1xuICByZWFkb25seSBsb2NhbFN0YWNrPzogTG9jYWxTdGFja0VuZHBvaW50T3ZlcnJpZGVzO1xufVxuXG5leHBvcnQgY2xhc3MgTG9jYWxTdGFja0FnZW50aWNEb2N1bWVudFByb2Nlc3NpbmcgZXh0ZW5kcyBBZ2VudGljRG9jdW1lbnRQcm9jZXNzaW5nIHtcbiAgcHJpdmF0ZSBfbG9jYWxTdGFja0JhdGNoQWdlbnQ/OiBMb2NhbFN0YWNrQmF0Y2hBZ2VudDtcbiAgcHJpdmF0ZSBfbG9jYWxTdGFja1Byb2Nlc3NpbmdTdGVwQ291bnRlcj86IG51bWJlcjtcblxuICBjb25zdHJ1Y3RvcihzY29wZTogQ29uc3RydWN0LCBpZDogc3RyaW5nLCBwcm9wczogTG9jYWxTdGFja0FnZW50aWNEb2N1bWVudFByb2Nlc3NpbmdQcm9wcykge1xuICAgIHN1cGVyKHNjb3BlLCBpZCwge1xuICAgICAgLi4ucHJvcHMsXG4gICAgICBfc2tpcEJlZHJvY2tWcGNFbmRwb2ludHM6IHRydWUsXG4gICAgfSBhcyBBZ2VudGljRG9jdW1lbnRQcm9jZXNzaW5nUHJvcHMpO1xuICAgIHRoaXMuYXBwbHlMb2NhbFN0YWNrRW52aXJvbm1lbnQocHJvcHMubG9jYWxTdGFjayB8fCB7fSk7XG4gIH1cblxuICBwcm90ZWN0ZWQgcmVzb2x2ZUJlZHJvY2tJbnZva2VFbnRyeSgpOiBzdHJpbmcge1xuICAgIHJldHVybiBwYXRoLmpvaW4oX19kaXJuYW1lLCAncmVzb3VyY2VzL2RlZmF1bHQtbG9jYWxzdGFjay1pbnZva2UnKTtcbiAgfVxuXG4gIHByb3RlY3RlZCBwcm9jZXNzaW5nU3RlcCgpOiBEb2N1bWVudFByb2Nlc3NpbmdTdGVwVHlwZSB7XG4gICAgaWYgKCF0aGlzLl9sb2NhbFN0YWNrQmF0Y2hBZ2VudCkge1xuICAgICAgY29uc3QgYWdlbnRQcm9wcyA9IHRoaXMuYmVkcm9ja0RvY3VtZW50UHJvY2Vzc2luZ1Byb3BzIGFzIExvY2FsU3RhY2tBZ2VudGljRG9jdW1lbnRQcm9jZXNzaW5nUHJvcHM7XG4gICAgICBjb25zdCBwcm9jZXNzaW5nQWdlbnRQcm9wcyA9IGFnZW50UHJvcHMucHJvY2Vzc2luZ0FnZW50UGFyYW1ldGVycztcblxuICAgICAgdGhpcy5fbG9jYWxTdGFja0JhdGNoQWdlbnQgPSBuZXcgTG9jYWxTdGFja0JhdGNoQWdlbnQodGhpcywgJ0lEUExvY2FsU3RhY2tCYXRjaEFnZW50Jywge1xuICAgICAgICAuLi5wcm9jZXNzaW5nQWdlbnRQcm9wcyxcbiAgICAgICAgbG9jYWxTdGFjazogYWdlbnRQcm9wcy5sb2NhbFN0YWNrLFxuICAgICAgfSk7XG5cbiAgICAgIGNvbnN0IGFkYXB0ZXJQb2xpY3lTdGF0ZW1lbnRzID0gdGhpcy5pbmdyZXNzQWRhcHRlci5nZW5lcmF0ZUFkYXB0ZXJJQU1Qb2xpY2llcygpO1xuICAgICAgZm9yIChjb25zdCBzdGF0ZW1lbnQgb2YgYWRhcHRlclBvbGljeVN0YXRlbWVudHMpIHtcbiAgICAgICAgdGhpcy5fbG9jYWxTdGFja0JhdGNoQWdlbnQuYWdlbnRSb2xlLmFkZFRvUHJpbmNpcGFsUG9saWN5KHN0YXRlbWVudCk7XG4gICAgICB9XG4gICAgfVxuXG4gICAgaWYgKHRoaXMuX2xvY2FsU3RhY2tQcm9jZXNzaW5nU3RlcENvdW50ZXIgPT09IHVuZGVmaW5lZCkge1xuICAgICAgdGhpcy5fbG9jYWxTdGFja1Byb2Nlc3NpbmdTdGVwQ291bnRlciA9IDA7XG4gICAgfVxuXG4gICAgY29uc3Qgc3RlcElkID0gYFByb2Nlc3NpbmdTdGVwLSR7dGhpcy5fbG9jYWxTdGFja1Byb2Nlc3NpbmdTdGVwQ291bnRlcn1gO1xuICAgIHRoaXMuX2xvY2FsU3RhY2tQcm9jZXNzaW5nU3RlcENvdW50ZXIrKztcblxuICAgIHJldHVybiBuZXcgTGFtYmRhSW52b2tlKHRoaXMsIHN0ZXBJZCwge1xuICAgICAgbGFtYmRhRnVuY3Rpb246IHRoaXMuX2xvY2FsU3RhY2tCYXRjaEFnZW50LmFnZW50RnVuY3Rpb24hLFxuICAgICAgcmVzdWx0UGF0aDogJyQucHJvY2Vzc2luZ1Jlc3VsdCcsXG4gICAgICByZXN1bHRTZWxlY3Rvcjoge1xuICAgICAgICAncmVzdWx0LiQnOiAnJC5QYXlsb2FkLnJlc3VsdCcsXG4gICAgICB9LFxuICAgIH0pO1xuICB9XG5cbiAgcHJpdmF0ZSBhcHBseUxvY2FsU3RhY2tFbnZpcm9ubWVudChsb2NhbFN0YWNrOiBMb2NhbFN0YWNrRW5kcG9pbnRPdmVycmlkZXMpOiB2b2lkIHtcbiAgICBjb25zdCBsb2NhbFN0YWNrRW52ID0gTG9jYWxTdGFja0ludGVncmF0aW9uVXRpbHMudG9MYW1iZGFFbnZpcm9ubWVudCh7XG4gICAgICBlbmFibGVkOiB0cnVlLFxuICAgICAgLi4ubG9jYWxTdGFjayxcbiAgICB9KTtcblxuICAgIGZvciAoY29uc3QgY2hpbGQgb2YgdGhpcy5ub2RlLmZpbmRBbGwoKSkge1xuICAgICAgaWYgKGNoaWxkIGluc3RhbmNlb2YgTGFtYmRhRnVuY3Rpb24pIHtcbiAgICAgICAgZm9yIChjb25zdCBba2V5LCB2YWx1ZV0gb2YgT2JqZWN0LmVudHJpZXMobG9jYWxTdGFja0VudikpIHtcbiAgICAgICAgICBjaGlsZC5hZGRFbnZpcm9ubWVudChrZXksIHZhbHVlKTtcbiAgICAgICAgfVxuICAgICAgfVxuICAgIH1cbiAgfVxufVxuIl19
@@ -0,0 +1,16 @@
1
+ import { Construct } from 'constructs';
2
+ import { BedrockDocumentProcessing, BedrockDocumentProcessingProps } from './bedrock-document-processing';
3
+ import { LocalStackEndpointOverrides } from '../framework/localstack';
4
+ export interface LocalStackBedrockDocumentProcessingProps extends BedrockDocumentProcessingProps {
5
+ /**
6
+ * LocalStack endpoint routing configuration for Lambda runtime SDK calls.
7
+ *
8
+ * @default { enabled: true }
9
+ */
10
+ readonly localStack?: LocalStackEndpointOverrides;
11
+ }
12
+ export declare class LocalStackBedrockDocumentProcessing extends BedrockDocumentProcessing {
13
+ constructor(scope: Construct, id: string, props: LocalStackBedrockDocumentProcessingProps);
14
+ protected resolveBedrockInvokeEntry(): string;
15
+ private applyLocalStackEnvironment;
16
+ }
@@ -0,0 +1,38 @@
1
+ "use strict";
2
+ var _a;
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.LocalStackBedrockDocumentProcessing = void 0;
5
+ const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
6
+ const path = require("path");
7
+ const aws_lambda_1 = require("aws-cdk-lib/aws-lambda");
8
+ const bedrock_document_processing_1 = require("./bedrock-document-processing");
9
+ const localstack_1 = require("../framework/localstack");
10
+ class LocalStackBedrockDocumentProcessing extends bedrock_document_processing_1.BedrockDocumentProcessing {
11
+ constructor(scope, id, props) {
12
+ super(scope, id, {
13
+ ...props,
14
+ _skipBedrockVpcEndpoints: true,
15
+ });
16
+ this.applyLocalStackEnvironment(props.localStack);
17
+ }
18
+ resolveBedrockInvokeEntry() {
19
+ return path.join(__dirname, 'resources/default-localstack-invoke');
20
+ }
21
+ applyLocalStackEnvironment(localStack) {
22
+ const localStackEnv = localstack_1.LocalStackIntegrationUtils.toLambdaEnvironment({
23
+ enabled: true,
24
+ ...localStack,
25
+ });
26
+ for (const child of this.node.findAll()) {
27
+ if (child instanceof aws_lambda_1.Function) {
28
+ for (const [key, value] of Object.entries(localStackEnv)) {
29
+ child.addEnvironment(key, value);
30
+ }
31
+ }
32
+ }
33
+ }
34
+ }
35
+ exports.LocalStackBedrockDocumentProcessing = LocalStackBedrockDocumentProcessing;
36
+ _a = JSII_RTTI_SYMBOL_1;
37
+ LocalStackBedrockDocumentProcessing[_a] = { fqn: "@cdklabs/cdk-appmod-catalog-blueprints.LocalStackBedrockDocumentProcessing", version: "1.17.0" };
38
+ //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibG9jYWxzdGFjay1iZWRyb2NrLWRvY3VtZW50LXByb2Nlc3NpbmcuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi91c2UtY2FzZXMvZG9jdW1lbnQtcHJvY2Vzc2luZy9sb2NhbHN0YWNrLWJlZHJvY2stZG9jdW1lbnQtcHJvY2Vzc2luZy50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiOzs7OztBQUFBLDZCQUE2QjtBQUM3Qix1REFBb0U7QUFFcEUsK0VBQTBHO0FBQzFHLHdEQUFrRztBQVdsRyxNQUFhLG1DQUFvQyxTQUFRLHVEQUF5QjtJQUNoRixZQUFZLEtBQWdCLEVBQUUsRUFBVSxFQUFFLEtBQStDO1FBQ3ZGLEtBQUssQ0FBQyxLQUFLLEVBQUUsRUFBRSxFQUFFO1lBQ2YsR0FBRyxLQUFLO1lBQ1Isd0JBQXdCLEVBQUUsSUFBSTtTQUNHLENBQUMsQ0FBQztRQUNyQyxJQUFJLENBQUMsMEJBQTBCLENBQUMsS0FBSyxDQUFDLFVBQVUsQ0FBQyxDQUFDO0lBQ3BELENBQUM7SUFFUyx5QkFBeUI7UUFDakMsT0FBTyxJQUFJLENBQUMsSUFBSSxDQUFDLFNBQVMsRUFBRSxxQ0FBcUMsQ0FBQyxDQUFDO0lBQ3JFLENBQUM7SUFFTywwQkFBMEIsQ0FBQyxVQUF3QztRQUN6RSxNQUFNLGFBQWEsR0FBRyx1Q0FBMEIsQ0FBQyxtQkFBbUIsQ0FBQztZQUNuRSxPQUFPLEVBQUUsSUFBSTtZQUNiLEdBQUcsVUFBVTtTQUNkLENBQUMsQ0FBQztRQUVILEtBQUssTUFBTSxLQUFLLElBQUksSUFBSSxDQUFDLElBQUksQ0FBQyxPQUFPLEVBQUUsRUFBRSxDQUFDO1lBQ3hDLElBQUksS0FBSyxZQUFZLHFCQUFjLEVBQUUsQ0FBQztnQkFDcEMsS0FBSyxNQUFNLENBQUMsR0FBRyxFQUFFLEtBQUssQ0FBQyxJQUFJLE1BQU0sQ0FBQyxPQUFPLENBQUMsYUFBYSxDQUFDLEVBQUUsQ0FBQztvQkFDekQsS0FBSyxDQUFDLGNBQWMsQ0FBQyxHQUFHLEVBQUUsS0FBSyxDQUFDLENBQUM7Z0JBQ25DLENBQUM7WUFDSCxDQUFDO1FBQ0gsQ0FBQztJQUNILENBQUM7O0FBMUJILGtGQTJCQyIsInNvdXJjZXNDb250ZW50IjpbImltcG9ydCAqIGFzIHBhdGggZnJvbSAncGF0aCc7XG5pbXBvcnQgeyBGdW5jdGlvbiBhcyBMYW1iZGFGdW5jdGlvbiB9IGZyb20gJ2F3cy1jZGstbGliL2F3cy1sYW1iZGEnO1xuaW1wb3J0IHsgQ29uc3RydWN0IH0gZnJvbSAnY29uc3RydWN0cyc7XG5pbXBvcnQgeyBCZWRyb2NrRG9jdW1lbnRQcm9jZXNzaW5nLCBCZWRyb2NrRG9jdW1lbnRQcm9jZXNzaW5nUHJvcHMgfSBmcm9tICcuL2JlZHJvY2stZG9jdW1lbnQtcHJvY2Vzc2luZyc7XG5pbXBvcnQgeyBMb2NhbFN0YWNrRW5kcG9pbnRPdmVycmlkZXMsIExvY2FsU3RhY2tJbnRlZ3JhdGlvblV0aWxzIH0gZnJvbSAnLi4vZnJhbWV3b3JrL2xvY2Fsc3RhY2snO1xuXG5leHBvcnQgaW50ZXJmYWNlIExvY2FsU3RhY2tCZWRyb2NrRG9jdW1lbnRQcm9jZXNzaW5nUHJvcHMgZXh0ZW5kcyBCZWRyb2NrRG9jdW1lbnRQcm9jZXNzaW5nUHJvcHMge1xuICAvKipcbiAgICogTG9jYWxTdGFjayBlbmRwb2ludCByb3V0aW5nIGNvbmZpZ3VyYXRpb24gZm9yIExhbWJkYSBydW50aW1lIFNESyBjYWxscy5cbiAgICpcbiAgICogQGRlZmF1bHQgeyBlbmFibGVkOiB0cnVlIH1cbiAgICovXG4gIHJlYWRvbmx5IGxvY2FsU3RhY2s/OiBMb2NhbFN0YWNrRW5kcG9pbnRPdmVycmlkZXM7XG59XG5cbmV4cG9ydCBjbGFzcyBMb2NhbFN0YWNrQmVkcm9ja0RvY3VtZW50UHJvY2Vzc2luZyBleHRlbmRzIEJlZHJvY2tEb2N1bWVudFByb2Nlc3Npbmcge1xuICBjb25zdHJ1Y3RvcihzY29wZTogQ29uc3RydWN0LCBpZDogc3RyaW5nLCBwcm9wczogTG9jYWxTdGFja0JlZHJvY2tEb2N1bWVudFByb2Nlc3NpbmdQcm9wcykge1xuICAgIHN1cGVyKHNjb3BlLCBpZCwge1xuICAgICAgLi4ucHJvcHMsXG4gICAgICBfc2tpcEJlZHJvY2tWcGNFbmRwb2ludHM6IHRydWUsXG4gICAgfSBhcyBCZWRyb2NrRG9jdW1lbnRQcm9jZXNzaW5nUHJvcHMpO1xuICAgIHRoaXMuYXBwbHlMb2NhbFN0YWNrRW52aXJvbm1lbnQocHJvcHMubG9jYWxTdGFjayk7XG4gIH1cblxuICBwcm90ZWN0ZWQgcmVzb2x2ZUJlZHJvY2tJbnZva2VFbnRyeSgpOiBzdHJpbmcge1xuICAgIHJldHVybiBwYXRoLmpvaW4oX19kaXJuYW1lLCAncmVzb3VyY2VzL2RlZmF1bHQtbG9jYWxzdGFjay1pbnZva2UnKTtcbiAgfVxuXG4gIHByaXZhdGUgYXBwbHlMb2NhbFN0YWNrRW52aXJvbm1lbnQobG9jYWxTdGFjaz86IExvY2FsU3RhY2tFbmRwb2ludE92ZXJyaWRlcyk6IHZvaWQge1xuICAgIGNvbnN0IGxvY2FsU3RhY2tFbnYgPSBMb2NhbFN0YWNrSW50ZWdyYXRpb25VdGlscy50b0xhbWJkYUVudmlyb25tZW50KHtcbiAgICAgIGVuYWJsZWQ6IHRydWUsXG4gICAgICAuLi5sb2NhbFN0YWNrLFxuICAgIH0pO1xuXG4gICAgZm9yIChjb25zdCBjaGlsZCBvZiB0aGlzLm5vZGUuZmluZEFsbCgpKSB7XG4gICAgICBpZiAoY2hpbGQgaW5zdGFuY2VvZiBMYW1iZGFGdW5jdGlvbikge1xuICAgICAgICBmb3IgKGNvbnN0IFtrZXksIHZhbHVlXSBvZiBPYmplY3QuZW50cmllcyhsb2NhbFN0YWNrRW52KSkge1xuICAgICAgICAgIGNoaWxkLmFkZEVudmlyb25tZW50KGtleSwgdmFsdWUpO1xuICAgICAgICB9XG4gICAgICB9XG4gICAgfVxuICB9XG59XG4iXX0=
@@ -36,8 +36,23 @@ except ImportError:
36
36
  logger = logging.getLogger()
37
37
  logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
38
38
 
39
+ def _resolve_endpoint_url(*service_env_keys: str) -> str | None:
40
+ for key in service_env_keys:
41
+ endpoint = os.getenv(key)
42
+ if endpoint:
43
+ return endpoint
44
+ return None
45
+
46
+
47
+ def _create_boto3_client(service_name: str, *service_env_keys: str):
48
+ endpoint_url = _resolve_endpoint_url(*service_env_keys, 'AWS_ENDPOINT_URL')
49
+ if endpoint_url:
50
+ return boto3.client(service_name, endpoint_url=endpoint_url)
51
+ return boto3.client(service_name)
52
+
53
+
39
54
  # Initialize S3 client
40
- s3_client = boto3.client('s3')
55
+ s3_client = _create_boto3_client('s3', 'AWS_ENDPOINT_URL_S3')
41
56
 
42
57
 
43
58
  def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
@@ -0,0 +1,184 @@
1
+ import base64
2
+ import logging
3
+ import os
4
+ from aws_lambda_powertools import Metrics, Tracer
5
+ from aws_lambda_powertools.metrics import MetricUnit
6
+
7
+ from provider_runtime import (
8
+ MODEL_PROVIDER_OLLAMA,
9
+ create_boto3_client,
10
+ extract_pdf_text_for_ollama,
11
+ invoke_model,
12
+ normalize_model_response,
13
+ resolve_model_provider,
14
+ )
15
+
16
+ s3 = create_boto3_client('s3', 'AWS_ENDPOINT_URL_S3')
17
+ bedrock = create_boto3_client('bedrock-runtime', 'AWS_ENDPOINT_URL_BEDROCK_RUNTIME')
18
+ metrics = Metrics()
19
+ tracer = Tracer()
20
+ logger = logging.getLogger()
21
+ logger.setLevel(os.getenv('LOG_LEVEL', 'INFO').upper())
22
+
23
+
24
+ def parse_chunk_metadata(event):
25
+ """
26
+ Parse optional chunk metadata from the event payload.
27
+
28
+ Returns a dictionary with chunk information if present, None otherwise.
29
+ Supports both direct chunk metadata and nested chunk object format.
30
+ """
31
+ # Check for direct chunkMetadata field
32
+ if 'chunkMetadata' in event:
33
+ return event['chunkMetadata']
34
+
35
+ # Check for chunk object (from Map State iteration)
36
+ if 'chunk' in event:
37
+ chunk = event['chunk']
38
+ return {
39
+ 'chunkIndex': chunk.get('chunkIndex', event.get('chunkIndex', 0)),
40
+ 'totalChunks': event.get('totalChunks', 1),
41
+ 'startPage': chunk.get('startPage', 0),
42
+ 'endPage': chunk.get('endPage', 0),
43
+ 'pageCount': chunk.get('pageCount', 0),
44
+ 'estimatedTokens': chunk.get('estimatedTokens', 0),
45
+ 'overlapPages': chunk.get('overlapPages', 0),
46
+ }
47
+
48
+ return None
49
+
50
+
51
+ def build_chunk_context_prompt(chunk_metadata):
52
+ """
53
+ Build a context prompt for chunk-aware processing.
54
+
55
+ Args:
56
+ chunk_metadata: Dictionary containing chunk information
57
+
58
+ Returns:
59
+ String with chunk context to prepend to the main prompt
60
+ """
61
+ if not chunk_metadata:
62
+ return ""
63
+
64
+ chunk_index = chunk_metadata.get('chunkIndex', 0)
65
+ total_chunks = chunk_metadata.get('totalChunks', 1)
66
+ start_page = chunk_metadata.get('startPage', 0)
67
+ end_page = chunk_metadata.get('endPage', 0)
68
+ overlap_pages = chunk_metadata.get('overlapPages', 0)
69
+
70
+ # Build context string
71
+ context_parts = [
72
+ f"You are analyzing chunk {chunk_index + 1} of {total_chunks} from pages {start_page + 1} to {end_page + 1}."
73
+ ]
74
+
75
+ # Add overlap information if applicable
76
+ if overlap_pages > 0 and chunk_index > 0:
77
+ context_parts.append(
78
+ f"Note: This chunk includes {overlap_pages} overlapping pages from the previous chunk for context."
79
+ )
80
+
81
+ return "\n".join(context_parts) + "\n\n"
82
+
83
+
84
+ @metrics.log_metrics
85
+ @tracer.capture_lambda_handler
86
+ def handler(event, context):
87
+ invoke_type = os.environ["INVOKE_TYPE"]
88
+ tracer.put_annotation(key="invoke_type", value=invoke_type)
89
+ tracer.put_annotation(key="documentId", value=event["documentId"])
90
+ metrics.add_dimension(name="invoke_type", value=invoke_type)
91
+ content_type = event["contentType"]
92
+
93
+ # Parse optional chunk metadata
94
+ chunk_metadata = parse_chunk_metadata(event)
95
+ if chunk_metadata:
96
+ tracer.put_annotation(key="chunkIndex", value=str(chunk_metadata.get('chunkIndex', 0)))
97
+ tracer.put_annotation(key="totalChunks", value=str(chunk_metadata.get('totalChunks', 1)))
98
+ metrics.add_dimension(name="is_chunked", value="true")
99
+ else:
100
+ metrics.add_dimension(name="is_chunked", value="false")
101
+
102
+ # Format prompt if classification result exists
103
+ prompt = os.environ['PROMPT']
104
+ if 'classificationResult' in event:
105
+ classification = event['classificationResult']['documentClassification']
106
+ prompt = prompt.replace("[ACTUAL_CLASSIFICATION]", classification)
107
+
108
+ # Add chunk context to prompt if processing a chunk
109
+ chunk_context = build_chunk_context_prompt(chunk_metadata)
110
+ if chunk_context:
111
+ prompt = chunk_context + prompt
112
+
113
+ model_provider = resolve_model_provider()
114
+
115
+ # Build content based on file type
116
+ content = [{'type': 'text', 'text': prompt}]
117
+ if content_type == 'file':
118
+ content_location = event['content']['location']
119
+
120
+ if content_location == 's3':
121
+ # Use chunk-specific S3 location if available, otherwise use original content
122
+ if chunk_metadata and 'bucket' in chunk_metadata and 'key' in chunk_metadata:
123
+ bucket = chunk_metadata['bucket']
124
+ key = chunk_metadata['key']
125
+ else:
126
+ bucket = event['content']['bucket']
127
+ key = event['content']['key']
128
+
129
+ # Check file type
130
+ ext = key.lower().split('.')[-1]
131
+ if ext not in ['jpg', 'jpeg', 'png', 'pdf']:
132
+ raise ValueError(f"Unsupported file type: {ext}")
133
+
134
+ media_type = {'jpg': 'image/jpeg', 'jpeg': 'image/jpeg', 'png': 'image/png', 'pdf': 'application/pdf'}[ext]
135
+
136
+ # Download file to /tmp
137
+ local_path = f"/tmp/{key.split('/')[-1]}"
138
+ s3.download_file(bucket, key, local_path)
139
+
140
+ if model_provider == MODEL_PROVIDER_OLLAMA:
141
+ if ext == 'pdf':
142
+ extracted_text = extract_pdf_text_for_ollama(local_path)
143
+ if extracted_text:
144
+ content.append({
145
+ 'type': 'text',
146
+ 'text': f'Document text extract:\n\n{extracted_text}'
147
+ })
148
+ else:
149
+ content.append({
150
+ 'type': 'text',
151
+ 'text': (
152
+ 'Document is PDF, but text extraction returned no content. '
153
+ 'Classify based on any other provided context.'
154
+ )
155
+ })
156
+ else:
157
+ content.append({
158
+ 'type': 'text',
159
+ 'text': (
160
+ f'Input includes an image ({media_type}). '
161
+ 'Direct Ollama mode does not attach image bytes in this workflow.'
162
+ )
163
+ })
164
+ else:
165
+ # Read and encode file
166
+ with open(local_path, 'rb') as f:
167
+ file_data = base64.b64encode(f.read()).decode('utf-8')
168
+
169
+ if ext == 'pdf':
170
+ content.append({'type': 'document', 'source': {'type': 'base64', 'media_type': media_type, 'data': file_data}})
171
+ else:
172
+ content.append({'type': 'image', 'source': {'type': 'base64', 'media_type': media_type, 'data': file_data}})
173
+
174
+ elif content_type == 'data':
175
+ content.append({
176
+ 'type': 'text',
177
+ 'text': event['content']['data']
178
+ })
179
+
180
+ model_id = os.environ['MODEL_ID']
181
+ max_tokens = int(os.getenv('INVOKE_MAX_TOKENS', '1000'))
182
+ response_text = invoke_model(content, max_tokens, model_id, bedrock, logger)
183
+ metrics.add_metric(name="SuccessfulInvocation", unit=MetricUnit.Count, value=1)
184
+ return normalize_model_response(response_text, invoke_type)
@@ -0,0 +1,251 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import re
5
+ from typing import Any, Dict, Optional
6
+ from urllib import error as urllib_error
7
+ from urllib import request as urllib_request
8
+
9
+ import boto3
10
+
11
+ MODEL_PROVIDER_BEDROCK = 'bedrock'
12
+ MODEL_PROVIDER_OLLAMA = 'ollama'
13
+ DEFAULT_MODEL_PROVIDER = MODEL_PROVIDER_BEDROCK
14
+ DEFAULT_OLLAMA_BASE_URL = 'http://host.docker.internal:11434'
15
+ OLLAMA_CHAT_PATH = '/api/chat'
16
+
17
+ JSON_CODE_BLOCK_PATTERN = re.compile(r'```(?:json)?\s*({.*?})\s*```', re.DOTALL)
18
+ JSON_OBJECT_PATTERN = re.compile(r'({[^{}]*(?:{[^{}]*}[^{}]*)*})', re.DOTALL)
19
+ MAX_OLLAMA_DOCUMENT_CONTEXT_CHARS = 12000
20
+ MAX_DEBUG_PAYLOAD_CHARS = int(os.getenv('DEBUG_OLLAMA_MAX_CHARS', '4000'))
21
+
22
+
23
+ def _resolve_endpoint_url(*service_env_keys: str) -> Optional[str]:
24
+ for key in service_env_keys:
25
+ endpoint = os.getenv(key)
26
+ if endpoint:
27
+ return endpoint
28
+ return None
29
+
30
+
31
+ def create_boto3_client(service_name: str, *service_env_keys: str):
32
+ endpoint_url = _resolve_endpoint_url(*service_env_keys, 'AWS_ENDPOINT_URL')
33
+ if endpoint_url:
34
+ return boto3.client(service_name, endpoint_url=endpoint_url)
35
+ return boto3.client(service_name)
36
+
37
+
38
+ def _is_truthy_env(var_name: str) -> bool:
39
+ return os.getenv(var_name, '').strip().lower() in {'1', 'true', 'yes', 'on'}
40
+
41
+
42
+ def _debug_ollama_enabled() -> bool:
43
+ return _is_truthy_env('DEBUG_OLLAMA_PAYLOAD')
44
+
45
+
46
+ def _truncate_for_debug(text: str, max_chars: int = MAX_DEBUG_PAYLOAD_CHARS) -> str:
47
+ if len(text) <= max_chars:
48
+ return text
49
+ return f'{text[:max_chars]}...[truncated]'
50
+
51
+
52
+ def resolve_model_provider() -> str:
53
+ provider = os.getenv('MODEL_PROVIDER', DEFAULT_MODEL_PROVIDER).strip().lower()
54
+ if provider in {MODEL_PROVIDER_BEDROCK, MODEL_PROVIDER_OLLAMA}:
55
+ return provider
56
+ return DEFAULT_MODEL_PROVIDER
57
+
58
+
59
+ def _resolve_ollama_model_id(model_id: str) -> str:
60
+ explicit_model = os.getenv('OLLAMA_MODEL_ID')
61
+ if explicit_model:
62
+ return explicit_model
63
+ if model_id.startswith('ollama.'):
64
+ return model_id.split('ollama.', 1)[1]
65
+ if model_id.startswith('ollama/'):
66
+ return model_id.split('ollama/', 1)[1]
67
+ return model_id
68
+
69
+
70
+ def _extract_json_object(response_text: str) -> Optional[Dict[str, Any]]:
71
+ text = response_text.strip()
72
+ if not text:
73
+ return None
74
+
75
+ try:
76
+ parsed = json.loads(text)
77
+ if isinstance(parsed, dict):
78
+ return parsed
79
+ except json.JSONDecodeError:
80
+ pass
81
+
82
+ candidates: list[tuple[Dict[str, Any], int]] = []
83
+
84
+ for json_code_block in JSON_CODE_BLOCK_PATTERN.finditer(response_text):
85
+ try:
86
+ parsed = json.loads(json_code_block.group(1))
87
+ if isinstance(parsed, dict):
88
+ candidates.append((parsed, json_code_block.end()))
89
+ except json.JSONDecodeError:
90
+ pass
91
+
92
+ for raw_json in JSON_OBJECT_PATTERN.finditer(response_text):
93
+ try:
94
+ parsed = json.loads(raw_json.group(1))
95
+ if isinstance(parsed, dict):
96
+ candidates.append((parsed, raw_json.end()))
97
+ except json.JSONDecodeError:
98
+ pass
99
+
100
+ if candidates:
101
+ return max(candidates, key=lambda candidate: candidate[1])[0]
102
+
103
+ return None
104
+
105
+
106
+ def normalize_model_response(response_text: str, invoke_type: str) -> Dict[str, Any]:
107
+ parsed = _extract_json_object(response_text)
108
+ if parsed is None:
109
+ if invoke_type == 'classification':
110
+ return {'documentClassification': 'UNKNOWN'}
111
+ return {'raw_response': response_text}
112
+
113
+ if invoke_type == 'classification' and 'documentClassification' not in parsed:
114
+ classification = parsed.get('classification') or parsed.get('document_type') or parsed.get('type')
115
+ parsed['documentClassification'] = classification or 'UNKNOWN'
116
+
117
+ return parsed
118
+
119
+
120
+ def _build_ollama_prompt(content: list[Dict[str, Any]]) -> str:
121
+ prompt_parts = []
122
+ for item in content:
123
+ item_type = item.get('type')
124
+ if item_type == 'text':
125
+ prompt_parts.append(item.get('text', ''))
126
+ continue
127
+
128
+ if item_type in {'document', 'image'}:
129
+ source = item.get('source', {})
130
+ media_type = source.get('media_type', 'application/octet-stream')
131
+ prompt_parts.append(
132
+ f"[Attached {item_type} with media type '{media_type}' omitted in direct Ollama mode.]",
133
+ )
134
+
135
+ return '\n\n'.join(part for part in prompt_parts if part)
136
+
137
+
138
+ def extract_pdf_text_for_ollama(local_path: str) -> str:
139
+ """Extract text from a PDF file for direct Ollama prompt mode."""
140
+ try:
141
+ from PyPDF2 import PdfReader
142
+ except ImportError:
143
+ return ''
144
+
145
+ text_parts: list[str] = []
146
+ try:
147
+ reader = PdfReader(local_path)
148
+ for page in reader.pages:
149
+ page_text = page.extract_text() or ''
150
+ if page_text:
151
+ text_parts.append(page_text.strip())
152
+ if sum(len(part) for part in text_parts) >= MAX_OLLAMA_DOCUMENT_CONTEXT_CHARS:
153
+ break
154
+ except Exception:
155
+ return ''
156
+
157
+ joined = '\n\n'.join(part for part in text_parts if part)
158
+ if len(joined) > MAX_OLLAMA_DOCUMENT_CONTEXT_CHARS:
159
+ return f'{joined[:MAX_OLLAMA_DOCUMENT_CONTEXT_CHARS]}\n\n[Document text truncated]'
160
+ return joined
161
+
162
+
163
+ def _invoke_bedrock_model(content: list[Dict[str, Any]], max_tokens: int, model_id: str, bedrock_client) -> str:
164
+ response = bedrock_client.invoke_model(
165
+ modelId=model_id,
166
+ body=json.dumps({
167
+ 'anthropic_version': 'bedrock-2023-05-31',
168
+ 'max_tokens': max_tokens,
169
+ 'messages': [{'role': 'user', 'content': content}],
170
+ }),
171
+ )
172
+ response_payload = response['body'].read()
173
+ parsed_response = json.loads(response_payload)
174
+ return parsed_response['content'][0]['text']
175
+
176
+
177
+ def _invoke_ollama_model(content: list[Dict[str, Any]], max_tokens: int, model_id: str, logger: logging.Logger) -> str:
178
+ ollama_base_url = os.getenv('OLLAMA_BASE_URL', DEFAULT_OLLAMA_BASE_URL).rstrip('/')
179
+ ollama_model_id = _resolve_ollama_model_id(model_id)
180
+ ollama_prompt = _build_ollama_prompt(content)
181
+ ollama_num_ctx = os.getenv('OLLAMA_NUM_CTX')
182
+
183
+ options: Dict[str, Any] = {
184
+ 'num_predict': max_tokens,
185
+ }
186
+ if ollama_num_ctx:
187
+ try:
188
+ options['num_ctx'] = int(ollama_num_ctx)
189
+ except ValueError:
190
+ logger.warning('Ignoring invalid OLLAMA_NUM_CTX value: %s', ollama_num_ctx)
191
+
192
+ request_payload: Dict[str, Any] = {
193
+ 'model': ollama_model_id,
194
+ 'messages': [{'role': 'user', 'content': ollama_prompt}],
195
+ 'stream': False,
196
+ 'options': options,
197
+ }
198
+ request_body = json.dumps(request_payload).encode('utf-8')
199
+
200
+ if _debug_ollama_enabled():
201
+ logger.info(
202
+ 'Ollama request debug: model=%s base_url=%s prompt_len=%s request=%s',
203
+ ollama_model_id,
204
+ ollama_base_url,
205
+ len(ollama_prompt),
206
+ _truncate_for_debug(json.dumps(request_payload)),
207
+ )
208
+
209
+ request = urllib_request.Request(
210
+ f'{ollama_base_url}{OLLAMA_CHAT_PATH}',
211
+ data=request_body,
212
+ headers={'Content-Type': 'application/json'},
213
+ method='POST',
214
+ )
215
+
216
+ try:
217
+ with urllib_request.urlopen(request, timeout=120) as response:
218
+ raw_payload = response.read().decode('utf-8')
219
+ except urllib_error.HTTPError as error:
220
+ error_payload = error.read().decode('utf-8', errors='replace')
221
+ raise RuntimeError(f'Ollama HTTP error {error.code}: {error_payload}') from error
222
+ except urllib_error.URLError as error:
223
+ raise RuntimeError(f'Failed to connect to Ollama endpoint: {error}') from error
224
+
225
+ if _debug_ollama_enabled():
226
+ logger.info(
227
+ 'Ollama raw response debug: %s',
228
+ _truncate_for_debug(raw_payload),
229
+ )
230
+
231
+ payload = json.loads(raw_payload)
232
+ message = payload.get('message', {})
233
+ if isinstance(message, dict) and isinstance(message.get('content'), str):
234
+ return message['content']
235
+
236
+ if isinstance(payload.get('response'), str):
237
+ return payload['response']
238
+
239
+ raise RuntimeError(f'Unexpected Ollama response format: {payload}')
240
+
241
+
242
+ def invoke_model(
243
+ content: list[Dict[str, Any]],
244
+ max_tokens: int,
245
+ model_id: str,
246
+ bedrock_client,
247
+ logger: logging.Logger,
248
+ ) -> str:
249
+ if resolve_model_provider() == MODEL_PROVIDER_OLLAMA:
250
+ return _invoke_ollama_model(content, max_tokens, model_id, logger)
251
+ return _invoke_bedrock_model(content, max_tokens, model_id, bedrock_client)
@@ -0,0 +1,5 @@
1
+ boto3>=1.26.0
2
+ aws-lambda-powertools
3
+ urllib3>=1.26.0,<2.0.0
4
+ aws-xray-sdk
5
+ PyPDF2>=3.0.0
@@ -7,7 +7,22 @@ import re
7
7
  from aws_lambda_powertools import Metrics, Tracer
8
8
  from aws_lambda_powertools.metrics import MetricUnit
9
9
 
10
- sfn_client = boto3.client('stepfunctions')
10
+ def _resolve_endpoint_url(*service_env_keys):
11
+ for key in service_env_keys:
12
+ endpoint = os.getenv(key)
13
+ if endpoint:
14
+ return endpoint
15
+ return None
16
+
17
+
18
+ def _create_boto3_client(service_name, *service_env_keys):
19
+ endpoint_url = _resolve_endpoint_url(*service_env_keys, 'AWS_ENDPOINT_URL')
20
+ if endpoint_url:
21
+ return boto3.client(service_name, endpoint_url=endpoint_url)
22
+ return boto3.client(service_name)
23
+
24
+
25
+ sfn_client = _create_boto3_client('stepfunctions', 'AWS_ENDPOINT_URL_STEPFUNCTIONS')
11
26
  metrics = Metrics()
12
27
  tracer = Tracer()
13
28
 
@@ -113,4 +128,4 @@ def handler(event, context):
113
128
  'statusCode': 200,
114
129
  'processedCount': len(results),
115
130
  'results': results
116
- }
131
+ }
@@ -68,8 +68,23 @@ structured_logger = get_logger(__name__)
68
68
  logger = logging.getLogger()
69
69
  logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
70
70
 
71
+ def _resolve_endpoint_url(*service_env_keys: str) -> Optional[str]:
72
+ for key in service_env_keys:
73
+ endpoint = os.getenv(key)
74
+ if endpoint:
75
+ return endpoint
76
+ return None
77
+
78
+
79
+ def _create_boto3_client(service_name: str, *service_env_keys: str):
80
+ endpoint_url = _resolve_endpoint_url(*service_env_keys, 'AWS_ENDPOINT_URL')
81
+ if endpoint_url:
82
+ return boto3.client(service_name, endpoint_url=endpoint_url)
83
+ return boto3.client(service_name)
84
+
85
+
71
86
  # Initialize AWS clients
72
- s3_client = boto3.client('s3')
87
+ s3_client = _create_boto3_client('s3', 'AWS_ENDPOINT_URL_S3')
73
88
 
74
89
  # Get Powertools metrics instance
75
90
  metrics = get_metrics()