@cdklabs/cdk-appmod-catalog-blueprints 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.jsii +8644 -0
  2. package/LICENSE +202 -0
  3. package/README.md +212 -0
  4. package/lib/document-processing/agentic-document-processing.d.ts +16 -0
  5. package/lib/document-processing/agentic-document-processing.js +90 -0
  6. package/lib/document-processing/base-document-processing.d.ts +189 -0
  7. package/lib/document-processing/base-document-processing.js +509 -0
  8. package/lib/document-processing/bedrock-document-processing.d.ts +167 -0
  9. package/lib/document-processing/bedrock-document-processing.js +297 -0
  10. package/lib/document-processing/index.d.ts +3 -0
  11. package/lib/document-processing/index.js +20 -0
  12. package/lib/document-processing/resources/default-bedrock-invoke/index.py +63 -0
  13. package/lib/document-processing/resources/default-bedrock-invoke/requirements.txt +4 -0
  14. package/lib/document-processing/resources/default-doc-retrieval-lambda/index.mjs +92 -0
  15. package/lib/document-processing/resources/default-doc-retrieval-lambda/package.json +10 -0
  16. package/lib/document-processing/resources/default-error-handler/index.js +46 -0
  17. package/lib/document-processing/resources/default-error-handler/package.json +4 -0
  18. package/lib/document-processing/resources/default-image-processor/classifier.mjs +665 -0
  19. package/lib/document-processing/resources/default-image-processor/extractors.mjs +465 -0
  20. package/lib/document-processing/resources/default-image-processor/index.mjs +143 -0
  21. package/lib/document-processing/resources/default-image-processor/package-lock.json +12 -0
  22. package/lib/document-processing/resources/default-image-processor/package.json +4 -0
  23. package/lib/document-processing/resources/default-image-validator/index.mjs +76 -0
  24. package/lib/document-processing/resources/default-image-validator/package-lock.json +154 -0
  25. package/lib/document-processing/resources/default-image-validator/package.json +7 -0
  26. package/lib/document-processing/resources/default-pdf-processor/index.js +46 -0
  27. package/lib/document-processing/resources/default-pdf-validator/index.js +36 -0
  28. package/lib/document-processing/resources/default-sqs-consumer/index.py +111 -0
  29. package/lib/document-processing/resources/default-sqs-consumer/requirements.txt +4 -0
  30. package/lib/document-processing/resources/default-sqs-consumer/sample_payload.json +20 -0
  31. package/lib/document-processing/resources/default-sqs-consumer/sample_payload_multi.json +24 -0
  32. package/lib/document-processing/resources/default-strands-agent/index.py +111 -0
  33. package/lib/document-processing/resources/default-strands-agent/requirements.txt +6 -0
  34. package/lib/document-processing/tests/agentic-document-processing-nag.test.d.ts +1 -0
  35. package/lib/document-processing/tests/agentic-document-processing-nag.test.js +107 -0
  36. package/lib/document-processing/tests/agentic-document-processing.test.d.ts +1 -0
  37. package/lib/document-processing/tests/agentic-document-processing.test.js +125 -0
  38. package/lib/document-processing/tests/bedrock-document-processing-nag.test.d.ts +1 -0
  39. package/lib/document-processing/tests/bedrock-document-processing-nag.test.js +101 -0
  40. package/lib/document-processing/tests/bedrock-document-processing.test.d.ts +1 -0
  41. package/lib/document-processing/tests/bedrock-document-processing.test.js +79 -0
  42. package/lib/framework/custom-resource/default-runtimes.d.ts +21 -0
  43. package/lib/framework/custom-resource/default-runtimes.js +34 -0
  44. package/lib/framework/custom-resource/index.d.ts +1 -0
  45. package/lib/framework/custom-resource/index.js +18 -0
  46. package/lib/framework/foundation/access-log.d.ts +69 -0
  47. package/lib/framework/foundation/access-log.js +121 -0
  48. package/lib/framework/foundation/eventbridge-broker.d.ts +18 -0
  49. package/lib/framework/foundation/eventbridge-broker.js +42 -0
  50. package/lib/framework/foundation/index.d.ts +3 -0
  51. package/lib/framework/foundation/index.js +20 -0
  52. package/lib/framework/foundation/network.d.ts +19 -0
  53. package/lib/framework/foundation/network.js +83 -0
  54. package/lib/framework/index.d.ts +2 -0
  55. package/lib/framework/index.js +19 -0
  56. package/lib/framework/quickstart/base-quickstart.d.ts +30 -0
  57. package/lib/framework/quickstart/base-quickstart.js +30 -0
  58. package/lib/index.d.ts +4 -0
  59. package/lib/index.js +21 -0
  60. package/lib/tsconfig.tsbuildinfo +1 -0
  61. package/lib/utilities/cdk-nag-config.d.ts +42 -0
  62. package/lib/utilities/cdk-nag-config.js +194 -0
  63. package/lib/utilities/data-loader-lambda/index.py +282 -0
  64. package/lib/utilities/data-loader-lambda/requirements.txt +3 -0
  65. package/lib/utilities/data-loader.d.ts +173 -0
  66. package/lib/utilities/data-loader.js +447 -0
  67. package/lib/utilities/index.d.ts +3 -0
  68. package/lib/utilities/index.js +20 -0
  69. package/lib/utilities/lambda-iam-utils.d.ts +145 -0
  70. package/lib/utilities/lambda-iam-utils.js +235 -0
  71. package/lib/utilities/lambda_layers/data-masking/layer-construct.d.ts +42 -0
  72. package/lib/utilities/lambda_layers/data-masking/layer-construct.js +53 -0
  73. package/lib/utilities/lambda_layers/data-masking/layer-construct.ts +88 -0
  74. package/lib/utilities/observability/bedrock-observability.d.ts +18 -0
  75. package/lib/utilities/observability/bedrock-observability.js +131 -0
  76. package/lib/utilities/observability/cloudfront-distribution-observability-property-injector.d.ts +6 -0
  77. package/lib/utilities/observability/cloudfront-distribution-observability-property-injector.js +22 -0
  78. package/lib/utilities/observability/index.d.ts +6 -0
  79. package/lib/utilities/observability/index.js +25 -0
  80. package/lib/utilities/observability/lambda-observability-property-injector.d.ts +8 -0
  81. package/lib/utilities/observability/lambda-observability-property-injector.js +43 -0
  82. package/lib/utilities/observability/log-group-data-protection-props.d.ts +19 -0
  83. package/lib/utilities/observability/log-group-data-protection-props.js +5 -0
  84. package/lib/utilities/observability/observability.d.ts +83 -0
  85. package/lib/utilities/observability/observability.js +278 -0
  86. package/lib/utilities/observability/observable.d.ts +32 -0
  87. package/lib/utilities/observability/observable.js +3 -0
  88. package/lib/utilities/observability/powertools-config.d.ts +3 -0
  89. package/lib/utilities/observability/powertools-config.js +25 -0
  90. package/lib/utilities/observability/resources/bedrock-manage-logging-configuration/index.py +27 -0
  91. package/lib/utilities/observability/state-machine-observability-property-injector.d.ts +8 -0
  92. package/lib/utilities/observability/state-machine-observability-property-injector.js +49 -0
  93. package/lib/utilities/tests/data-loader-nag.test.d.ts +1 -0
  94. package/lib/utilities/tests/data-loader-nag.test.js +432 -0
  95. package/lib/utilities/tests/data-loader.test.d.ts +1 -0
  96. package/lib/utilities/tests/data-loader.test.js +284 -0
  97. package/lib/webapp/frontend-construct.d.ts +136 -0
  98. package/lib/webapp/frontend-construct.js +253 -0
  99. package/lib/webapp/index.d.ts +1 -0
  100. package/lib/webapp/index.js +18 -0
  101. package/lib/webapp/tests/frontend-construct-nag.test.d.ts +1 -0
  102. package/lib/webapp/tests/frontend-construct-nag.test.js +266 -0
  103. package/lib/webapp/tests/frontend-construct.test.d.ts +1 -0
  104. package/lib/webapp/tests/frontend-construct.test.js +385 -0
  105. package/package.json +183 -0
@@ -0,0 +1,465 @@
1
+ /**
2
+ * Extractors for different document types from Textract API results
3
+ */
4
+
5
+ /**
6
+ * Extract key information from identity documents (AnalyzeID results)
7
+ * @param {Object} analyzeIdResult - Result from Textract AnalyzeID API
8
+ * @returns {Object} Extracted identity information
9
+ */
10
+ export function extractIdentityDocumentInfo(analyzeIdResult) {
11
+ const extractedInfo = {
12
+ documentType: 'IDENTITY_DOCUMENT',
13
+ identityDocumentFields: {},
14
+ identityDocumentType: null
15
+ };
16
+
17
+ if (!analyzeIdResult || !analyzeIdResult.IdentityDocuments || analyzeIdResult.IdentityDocuments.length === 0) {
18
+ return extractedInfo;
19
+ }
20
+
21
+ const idDoc = analyzeIdResult.IdentityDocuments[0];
22
+ extractedInfo.identityDocumentType = idDoc.DocumentType || null;
23
+
24
+ // Process identity document fields
25
+ if (idDoc.IdentityDocumentFields) {
26
+ idDoc.IdentityDocumentFields.forEach(field => {
27
+ if (field.Type && field.Type.Text && field.ValueDetection && field.ValueDetection.Text) {
28
+ const fieldName = field.Type.Text;
29
+ const fieldValue = field.ValueDetection.Text;
30
+ const fieldConfidence = field.ValueDetection.Confidence;
31
+
32
+ extractedInfo.identityDocumentFields[fieldName] = {
33
+ value: fieldValue,
34
+ confidence: fieldConfidence
35
+ };
36
+ }
37
+ });
38
+ }
39
+
40
+ // Extract common fields for easier access
41
+ const commonFields = {
42
+ 'FIRST_NAME': 'firstName',
43
+ 'LAST_NAME': 'lastName',
44
+ 'MIDDLE_NAME': 'middleName',
45
+ 'FULL_NAME': 'fullName',
46
+ 'DATE_OF_BIRTH': 'dateOfBirth',
47
+ 'DATE_OF_ISSUE': 'dateOfIssue',
48
+ 'DATE_OF_EXPIRY': 'dateOfExpiry',
49
+ 'DOCUMENT_NUMBER': 'documentNumber',
50
+ 'DOCUMENT_ID': 'documentId',
51
+ 'ADDRESS': 'address',
52
+ 'ISSUED_BY': 'issuedBy',
53
+ 'GENDER': 'gender',
54
+ 'NATIONALITY': 'nationality',
55
+ 'COUNTRY': 'country',
56
+ 'PLACE_OF_BIRTH': 'placeOfBirth'
57
+ };
58
+
59
+ Object.entries(commonFields).forEach(([apiField, normalizedField]) => {
60
+ if (extractedInfo.identityDocumentFields[apiField]) {
61
+ extractedInfo[normalizedField] = extractedInfo.identityDocumentFields[apiField].value;
62
+ }
63
+ });
64
+
65
+ return extractedInfo;
66
+ }
67
+
68
+ /**
69
+ * Extract key information from expense documents (AnalyzeExpense results)
70
+ * @param {Object} analyzeExpenseResult - Result from Textract AnalyzeExpense API
71
+ * @returns {Object} Extracted expense information
72
+ */
73
+ export function extractExpenseInfo(analyzeExpenseResult) {
74
+ const extractedInfo = {
75
+ documentType: 'EXPENSE_DOCUMENT',
76
+ expenseDocumentType: null, // INVOICE or RECEIPT
77
+ summaryFields: {},
78
+ lineItems: [],
79
+ vendor: null,
80
+ total: null,
81
+ subtotal: null,
82
+ tax: null,
83
+ paymentInfo: {},
84
+ dates: {}
85
+ };
86
+
87
+ if (!analyzeExpenseResult || !analyzeExpenseResult.ExpenseDocuments || analyzeExpenseResult.ExpenseDocuments.length === 0) {
88
+ return extractedInfo;
89
+ }
90
+
91
+ const expenseDoc = analyzeExpenseResult.ExpenseDocuments[0];
92
+
93
+ // Determine if it's an invoice or receipt
94
+ if (expenseDoc.Type) {
95
+ extractedInfo.expenseDocumentType = expenseDoc.Type.Text;
96
+ }
97
+
98
+ // Process summary fields
99
+ if (expenseDoc.SummaryFields) {
100
+ expenseDoc.SummaryFields.forEach(field => {
101
+ if (field.Type && field.Type.Text && field.ValueDetection) {
102
+ const fieldName = field.Type.Text;
103
+ const fieldValue = field.ValueDetection.Text;
104
+ const fieldConfidence = field.ValueDetection.Confidence;
105
+
106
+ extractedInfo.summaryFields[fieldName] = {
107
+ value: fieldValue,
108
+ confidence: fieldConfidence
109
+ };
110
+
111
+ // Extract common fields for easier access
112
+ switch (fieldName) {
113
+ case 'TOTAL':
114
+ extractedInfo.total = fieldValue;
115
+ break;
116
+ case 'SUBTOTAL':
117
+ extractedInfo.subtotal = fieldValue;
118
+ break;
119
+ case 'TAX':
120
+ case 'TOTAL_TAX':
121
+ extractedInfo.tax = fieldValue;
122
+ break;
123
+ case 'VENDOR_NAME':
124
+ extractedInfo.vendor = fieldValue;
125
+ break;
126
+ case 'INVOICE_RECEIPT_DATE':
127
+ case 'RECEIPT_DATE':
128
+ case 'INVOICE_DATE':
129
+ extractedInfo.dates.documentDate = fieldValue;
130
+ break;
131
+ case 'PAYMENT_DATE':
132
+ case 'DUE_DATE':
133
+ extractedInfo.dates.dueDate = fieldValue;
134
+ break;
135
+ case 'PAYMENT_TERMS':
136
+ extractedInfo.paymentInfo.terms = fieldValue;
137
+ break;
138
+ case 'PAYMENT_METHOD':
139
+ extractedInfo.paymentInfo.method = fieldValue;
140
+ break;
141
+ }
142
+ }
143
+ });
144
+ }
145
+
146
+ // Process line items
147
+ if (expenseDoc.LineItemGroups) {
148
+ expenseDoc.LineItemGroups.forEach(group => {
149
+ if (group.LineItems) {
150
+ group.LineItems.forEach(item => {
151
+ const lineItem = {};
152
+
153
+ if (item.LineItemExpenseFields) {
154
+ item.LineItemExpenseFields.forEach(field => {
155
+ if (field.Type && field.Type.Text && field.ValueDetection) {
156
+ const fieldName = field.Type.Text.toLowerCase().replace(/\s+/g, '_');
157
+ lineItem[fieldName] = field.ValueDetection.Text;
158
+ }
159
+ });
160
+ }
161
+
162
+ if (Object.keys(lineItem).length > 0) {
163
+ extractedInfo.lineItems.push(lineItem);
164
+ }
165
+ });
166
+ }
167
+ });
168
+ }
169
+
170
+ return extractedInfo;
171
+ }
172
+
173
+ /**
174
+ * Extract key information from lending documents (AnalyzeLending results)
175
+ * @param {Object} analyzeLendingResult - Result from Textract AnalyzeLending API
176
+ * @returns {Object} Extracted lending information
177
+ */
178
+ export function extractLendingInfo(analyzeLendingResult) {
179
+ const extractedInfo = {
180
+ documentType: 'LENDING_DOCUMENT',
181
+ lendingType: null,
182
+ extractedFields: {},
183
+ signatures: [],
184
+ paymentInfo: {},
185
+ loanInfo: {},
186
+ propertyInfo: {},
187
+ borrowerInfo: {},
188
+ lenderInfo: {}
189
+ };
190
+
191
+ if (!analyzeLendingResult || !analyzeLendingResult.LendingDocuments || analyzeLendingResult.LendingDocuments.length === 0) {
192
+ return extractedInfo;
193
+ }
194
+
195
+ const lendingDoc = analyzeLendingResult.LendingDocuments[0];
196
+
197
+ // Get lending type
198
+ if (lendingDoc.Type) {
199
+ extractedInfo.lendingType = lendingDoc.Type.Text;
200
+ }
201
+
202
+ // Process extracted fields
203
+ if (lendingDoc.ExtractedFields) {
204
+ lendingDoc.ExtractedFields.forEach(field => {
205
+ if (field.Type && field.Type.Text && field.ValueDetection) {
206
+ const fieldName = field.Type.Text;
207
+ const fieldValue = field.ValueDetection.Text;
208
+ const fieldConfidence = field.ValueDetection.Confidence;
209
+
210
+ extractedInfo.extractedFields[fieldName] = {
211
+ value: fieldValue,
212
+ confidence: fieldConfidence
213
+ };
214
+
215
+ // Categorize fields into appropriate sections
216
+ if (fieldName.includes('LOAN')) {
217
+ extractedInfo.loanInfo[fieldName] = fieldValue;
218
+ } else if (fieldName.includes('PROPERTY')) {
219
+ extractedInfo.propertyInfo[fieldName] = fieldValue;
220
+ } else if (fieldName.includes('BORROWER')) {
221
+ extractedInfo.borrowerInfo[fieldName] = fieldValue;
222
+ } else if (fieldName.includes('LENDER')) {
223
+ extractedInfo.lenderInfo[fieldName] = fieldValue;
224
+ } else if (fieldName.includes('PAYMENT')) {
225
+ extractedInfo.paymentInfo[fieldName] = fieldValue;
226
+ }
227
+ }
228
+ });
229
+ }
230
+
231
+ // Process signatures
232
+ if (lendingDoc.Signatures) {
233
+ lendingDoc.Signatures.forEach(signature => {
234
+ if (signature.ValueDetection) {
235
+ extractedInfo.signatures.push({
236
+ value: signature.ValueDetection.Text,
237
+ confidence: signature.ValueDetection.Confidence,
238
+ page: signature.Page
239
+ });
240
+ }
241
+ });
242
+ }
243
+
244
+ return extractedInfo;
245
+ }
246
+
247
+ /**
248
+ * Extract key information from generic documents (AnalyzeDocument results)
249
+ * @param {Object} analyzeDocumentResult - Result from Textract AnalyzeDocument API
250
+ * @returns {Object} Extracted document information
251
+ */
252
+ export function extractDocumentInfo(analyzeDocumentResult) {
253
+ const extractedInfo = {
254
+ documentType: 'GENERIC_DOCUMENT',
255
+ formFields: {},
256
+ tables: [],
257
+ text: '',
258
+ pages: 0
259
+ };
260
+
261
+ if (!analyzeDocumentResult || !analyzeDocumentResult.Blocks) {
262
+ return extractedInfo;
263
+ }
264
+
265
+ const blocks = analyzeDocumentResult.Blocks;
266
+
267
+ // Get page count
268
+ const pageBlocks = blocks.filter(block => block.BlockType === 'PAGE');
269
+ extractedInfo.pages = pageBlocks.length;
270
+
271
+ // Extract form fields (key-value pairs)
272
+ const keyMap = {};
273
+ const valueMap = {};
274
+
275
+ blocks.forEach(block => {
276
+ if (block.BlockType === 'KEY_VALUE_SET') {
277
+ if (block.EntityTypes && block.EntityTypes.includes('KEY')) {
278
+ keyMap[block.Id] = { id: block.Id };
279
+ } else if (block.EntityTypes && block.EntityTypes.includes('VALUE')) {
280
+ valueMap[block.Id] = { id: block.Id, value: '' };
281
+ }
282
+ }
283
+ });
284
+
285
+ // Get relationships between keys and values
286
+ blocks.forEach(block => {
287
+ if (block.BlockType === 'KEY_VALUE_SET' && block.EntityTypes && block.EntityTypes.includes('KEY')) {
288
+ if (block.Relationships) {
289
+ block.Relationships.forEach(relationship => {
290
+ if (relationship.Type === 'VALUE') {
291
+ relationship.Ids.forEach(valueId => {
292
+ keyMap[block.Id].valueId = valueId;
293
+ });
294
+ }
295
+ });
296
+ }
297
+ }
298
+ });
299
+
300
+ // Get key texts
301
+ blocks.forEach(block => {
302
+ if (block.BlockType === 'WORD') {
303
+ Object.values(keyMap).forEach(key => {
304
+ if (key.childIds && key.childIds.includes(block.Id)) {
305
+ key.text = (key.text || '') + ' ' + block.Text;
306
+ }
307
+ });
308
+
309
+ Object.values(valueMap).forEach(value => {
310
+ if (value.childIds && value.childIds.includes(block.Id)) {
311
+ value.value = (value.value || '') + ' ' + block.Text;
312
+ }
313
+ });
314
+ }
315
+ });
316
+
317
+ // Get child IDs for keys and values
318
+ blocks.forEach(block => {
319
+ if (block.BlockType === 'KEY_VALUE_SET') {
320
+ if (block.Relationships) {
321
+ block.Relationships.forEach(relationship => {
322
+ if (relationship.Type === 'CHILD') {
323
+ if (block.EntityTypes && block.EntityTypes.includes('KEY')) {
324
+ keyMap[block.Id].childIds = relationship.Ids;
325
+ } else if (block.EntityTypes && block.EntityTypes.includes('VALUE')) {
326
+ valueMap[block.Id].childIds = relationship.Ids;
327
+ }
328
+ }
329
+ });
330
+ }
331
+ }
332
+ });
333
+
334
+ // Extract text from keys and values
335
+ blocks.forEach(block => {
336
+ if (block.BlockType === 'WORD') {
337
+ Object.values(keyMap).forEach(key => {
338
+ if (key.childIds && key.childIds.includes(block.Id)) {
339
+ key.text = (key.text || '') + ' ' + block.Text;
340
+ }
341
+ });
342
+
343
+ Object.values(valueMap).forEach(value => {
344
+ if (value.childIds && value.childIds.includes(block.Id)) {
345
+ value.value = (value.value || '') + ' ' + block.Text;
346
+ }
347
+ });
348
+ }
349
+ });
350
+
351
+ // Map keys to values
352
+ Object.values(keyMap).forEach(key => {
353
+ if (key.text && key.valueId && valueMap[key.valueId]) {
354
+ const keyText = key.text.trim();
355
+ const valueText = valueMap[key.valueId].value ? valueMap[key.valueId].value.trim() : '';
356
+
357
+ if (keyText && valueText) {
358
+ extractedInfo.formFields[keyText] = valueText;
359
+ }
360
+ }
361
+ });
362
+
363
+ // Extract tables
364
+ const tableBlocks = blocks.filter(block => block.BlockType === 'TABLE');
365
+
366
+ tableBlocks.forEach(tableBlock => {
367
+ const table = {
368
+ rows: [],
369
+ rowCount: tableBlock.RowCount || 0,
370
+ columnCount: tableBlock.ColumnCount || 0,
371
+ page: tableBlock.Page || 1
372
+ };
373
+
374
+ if (tableBlock.Relationships) {
375
+ const cellIds = tableBlock.Relationships
376
+ .filter(rel => rel.Type === 'CHILD')
377
+ .flatMap(rel => rel.Ids);
378
+
379
+ const cellBlocks = blocks.filter(block =>
380
+ cellIds.includes(block.Id) && block.BlockType === 'CELL');
381
+
382
+ // Group cells by row
383
+ const rowMap = {};
384
+ cellBlocks.forEach(cell => {
385
+ const rowIndex = cell.RowIndex - 1; // Convert to 0-based index
386
+ if (!rowMap[rowIndex]) {
387
+ rowMap[rowIndex] = [];
388
+ }
389
+ rowMap[rowIndex].push(cell);
390
+ });
391
+
392
+ // Sort rows and cells
393
+ const sortedRows = Object.entries(rowMap)
394
+ .sort(([rowA], [rowB]) => parseInt(rowA) - parseInt(rowB))
395
+ .map(([_, cells]) =>
396
+ cells.sort((a, b) => a.ColumnIndex - b.ColumnIndex));
397
+
398
+ // Extract cell text
399
+ sortedRows.forEach(rowCells => {
400
+ const row = [];
401
+ rowCells.forEach(cell => {
402
+ let cellText = '';
403
+
404
+ if (cell.Relationships) {
405
+ const wordIds = cell.Relationships
406
+ .filter(rel => rel.Type === 'CHILD')
407
+ .flatMap(rel => rel.Ids);
408
+
409
+ const wordBlocks = blocks.filter(block =>
410
+ wordIds.includes(block.Id) && block.BlockType === 'WORD');
411
+
412
+ cellText = wordBlocks
413
+ .map(word => word.Text)
414
+ .join(' ');
415
+ }
416
+
417
+ row.push(cellText.trim());
418
+ });
419
+
420
+ table.rows.push(row);
421
+ });
422
+ }
423
+
424
+ extractedInfo.tables.push(table);
425
+ });
426
+
427
+ // Extract full text
428
+ const lineBlocks = blocks
429
+ .filter(block => block.BlockType === 'LINE')
430
+ .sort((a, b) => {
431
+ if (a.Page !== b.Page) return a.Page - b.Page;
432
+ return a.Geometry.BoundingBox.Top - b.Geometry.BoundingBox.Top;
433
+ });
434
+
435
+ extractedInfo.text = lineBlocks.map(block => block.Text).join('\n');
436
+
437
+ return extractedInfo;
438
+ }
439
+
440
+ /**
441
+ * Process Textract analysis result based on the API used
442
+ * @param {Object} analysisResult - Result from Textract API
443
+ * @param {string} apiUsed - The Textract API that was used
444
+ * @returns {Object} Extracted information
445
+ */
446
+ export function processAnalysisResult(analysisResult, apiUsed) {
447
+ if (!analysisResult) {
448
+ return { error: 'No analysis result provided' };
449
+ }
450
+
451
+ switch (apiUsed) {
452
+ case 'AnalyzeID':
453
+ return extractIdentityDocumentInfo(analysisResult);
454
+
455
+ case 'AnalyzeExpense':
456
+ return extractExpenseInfo(analysisResult);
457
+
458
+ case 'AnalyzeLending':
459
+ return extractLendingInfo(analysisResult);
460
+
461
+ case 'AnalyzeDocument':
462
+ default:
463
+ return extractDocumentInfo(analysisResult);
464
+ }
465
+ }
@@ -0,0 +1,143 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ import { S3Client, CopyObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3';
4
+ import { DynamoDBClient, PutItemCommand } from '@aws-sdk/client-dynamodb';
5
+ import { processDocument } from './classifier.mjs';
6
+
7
+ const s3Client = new S3Client({region: process.env.AWS_REGION});
8
+ const ddbClient = new DynamoDBClient({region: process.env.AWS_REGION});
9
+
10
+ async function streamToBuffer(stream) {
11
+ const chunks = [];
12
+ return new Promise((resolve, reject) => {
13
+ stream.on('data', (chunk) => chunks.push(Buffer.from(chunk)));
14
+ stream.on('error', (err) => reject(err));
15
+ stream.on('end', () => resolve(Buffer.concat(chunks)));
16
+ });
17
+ }
18
+
19
+ async function moveFile(bucket, sourceKey, status) {
20
+ try {
21
+ // Get the appropriate prefix based on status
22
+ const destinationPrefix = status === "PROCESSED"
23
+ ? process.env.OUTPUT_PREFIX
24
+ : process.env.FAILED_PREFIX;
25
+
26
+ if (!destinationPrefix) {
27
+ throw new Error(`${status === "PROCESSED" ? 'OUTPUT_PREFIX' : 'FAILED_PREFIX'} environment variable is not set`);
28
+ }
29
+
30
+ // Extract filename from the source key
31
+ const fileName = sourceKey.split('/').pop();
32
+ const destinationKey = `${destinationPrefix}${fileName}`;
33
+
34
+ // Copy the object to new location
35
+ await s3Client.send(new CopyObjectCommand({
36
+ Bucket: bucket,
37
+ CopySource: `${bucket}/${sourceKey}`,
38
+ Key: destinationKey
39
+ }));
40
+
41
+ // Delete the original object
42
+ await s3Client.send(new DeleteObjectCommand({
43
+ Bucket: bucket,
44
+ Key: sourceKey
45
+ }));
46
+
47
+ return destinationKey;
48
+ } catch (error) {
49
+ console.error('Error moving file:', error);
50
+ throw error;
51
+ }
52
+ }
53
+
54
+ export const handler = async (event) => {
55
+ console.log('Event:', JSON.stringify(event, null, 2))
56
+ const bucket = process.env.INPUT_BUCKET;
57
+
58
+ try {
59
+ // Get bucket and key from the event
60
+ const documentId = event.documentId
61
+ const key = event.key;
62
+
63
+ // GenAI configuration removed - will be handled by Step Functions integration
64
+ console.log('Using traditional Lambda processing (Textract + rules)');
65
+
66
+ const processResults = await processDocument(bucket, key, {
67
+ region: process.env.AWS_REGION,
68
+ processWithRecommendedApi: true,
69
+ genaiConfig: null // No GenAI in Lambda
70
+ })
71
+
72
+ console.log('Classification Method:', processResults.classificationMethod);
73
+ console.log('GenAI Used:', processResults.genaiUsed);
74
+ console.log(processResults.extractedEntities)
75
+
76
+ const newLocation = await moveFile(bucket, key, "PROCESSED");
77
+
78
+ // Process Textract results
79
+ const metadata = {
80
+ s3Location: newLocation,
81
+ timestamp: new Date().toISOString()
82
+ };
83
+
84
+ // 3. Store metadata in DynamoDB
85
+ const tableName = process.env.METADATA_TABLE;
86
+ if (!tableName) {
87
+ throw new Error('METADATA_TABLE environment variable is not set');
88
+ }
89
+
90
+ await ddbClient.send(
91
+ new PutItemCommand({
92
+ TableName: tableName, // Fixed the property name from TableItem to TableName
93
+ Item: {
94
+ documentId: {
95
+ S: documentId
96
+ },
97
+ s3Location: {
98
+ S: metadata.s3Location
99
+ },
100
+ createdAt: {
101
+ S: metadata.timestamp
102
+ },
103
+ classification: {
104
+ S: JSON.stringify(processResults.classification)
105
+ },
106
+ classificationMethod: {
107
+ S: processResults.classificationMethod || 'rule-based'
108
+ },
109
+ genaiUsed: {
110
+ BOOL: processResults.genaiUsed || false
111
+ },
112
+ textractApiUsed: {
113
+ S: JSON.stringify(processResults.apiUsed)
114
+ },
115
+ entities: {
116
+ S: JSON.stringify(processResults.extractedEntities)
117
+ }
118
+ }
119
+ })
120
+ );
121
+
122
+
123
+
124
+ // 4. Return the metadata
125
+ return {
126
+ status: "PROCESSED",
127
+ message: 'Document processed successfully',
128
+ metadata: metadata,
129
+ newLocation,
130
+ classificationMethod: processResults.classificationMethod,
131
+ genaiUsed: processResults.genaiUsed
132
+ };
133
+
134
+ } catch (error) {
135
+ console.error('Error processing document:', error);
136
+ const newLocation = await moveFile(bucket, event.key, "FAILED");
137
+ return {
138
+ status: "FAILED",
139
+ error: error.message || 'Unknown error',
140
+ newLocation
141
+ };
142
+ }
143
+ };
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "@document-processor/default-image-processor",
3
+ "version": "0.0.1",
4
+ "lockfileVersion": 3,
5
+ "requires": true,
6
+ "packages": {
7
+ "": {
8
+ "name": "@document-processor/default-image-processor",
9
+ "version": "0.0.1"
10
+ }
11
+ }
12
+ }
@@ -0,0 +1,4 @@
1
+ {
2
+ "name": "@document-processor/default-image-processor",
3
+ "version": "0.0.1"
4
+ }
@@ -0,0 +1,76 @@
1
+ import { S3Client, GetObjectCommand } from "@aws-sdk/client-s3";
2
+ import { fileTypeFromBuffer } from 'file-type';
3
+
4
+ const s3Client = new S3Client({});
5
+
6
+ export const handler = async (event) => {
7
+ console.log('Received event:', JSON.stringify(event, null, 2));
8
+ const { documentId, key } = event;
9
+ const bucket = process.env.INPUT_BUCKET;
10
+
11
+ try {
12
+ if (!documentId || !bucket || !key) {
13
+ throw new Error('Missing required fields: documentId, bucket, or key');
14
+ }
15
+
16
+ // Get the claimed extension from the filename
17
+ const claimedExt = key.split('.').pop().toLowerCase();
18
+ const validExtensions = ['jpg', 'jpeg', 'png', 'tiff'];
19
+
20
+ if (!validExtensions.includes(claimedExt)) {
21
+ throw new Error(`Invalid file format. Supported formats: JPG, PNG, TIFF. Received: ${claimedExt}`);
22
+ }
23
+
24
+ // Get the file from S3
25
+ const getObjectResponse = await s3Client.send(
26
+ new GetObjectCommand({
27
+ Bucket: bucket,
28
+ Key: key,
29
+ })
30
+ );
31
+
32
+ // Read the file content
33
+ const chunks = [];
34
+ for await (const chunk of getObjectResponse.Body) {
35
+ chunks.push(chunk);
36
+ }
37
+ const buffer = Buffer.concat(chunks);
38
+
39
+ // Detect actual file type from content
40
+ const fileType = await fileTypeFromBuffer(buffer);
41
+
42
+ if (!fileType) {
43
+ throw new Error('Could not determine file type from content');
44
+ }
45
+
46
+ // Validate that actual content type matches claimed extension
47
+ const actualExt = fileType.ext.toLowerCase();
48
+ if (actualExt !== claimedExt && !(actualExt === 'jpg' && claimedExt === 'jpeg')) {
49
+ throw new Error(`File extension mismatch. Claimed: ${claimedExt}, Actual: ${actualExt}`);
50
+ }
51
+
52
+ return {
53
+ documentId,
54
+ bucket,
55
+ key,
56
+ status: 'VALID',
57
+ metadata: {
58
+ validatedAt: new Date().toISOString(),
59
+ format: actualExt,
60
+ mimeType: fileType.mime
61
+ },
62
+ timestamp: new Date().toISOString()
63
+ };
64
+
65
+ } catch (error) {
66
+ console.error('Validation error:', error);
67
+ return {
68
+ documentId,
69
+ bucket,
70
+ key,
71
+ error: error.message,
72
+ status: 'INVALID',
73
+ timestamp: new Date().toISOString()
74
+ };
75
+ }
76
+ };