quidproquo-actionprocessor-awslambda 0.0.256 → 0.0.258

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/lib/commonjs/getActionProcessor/core/file/getFileGenerateTemporaryUploadSecureUrlActionProcessor.d.ts +2 -0
  2. package/lib/commonjs/getActionProcessor/core/file/getFileGenerateTemporaryUploadSecureUrlActionProcessor.js +35 -0
  3. package/lib/commonjs/getActionProcessor/core/file/index.js +2 -1
  4. package/lib/commonjs/getActionProcessor/webserver/extract/getExtractExpenseActionProcessor.d.ts +2 -0
  5. package/lib/commonjs/getActionProcessor/webserver/extract/getExtractExpenseActionProcessor.js +56 -0
  6. package/lib/commonjs/getActionProcessor/webserver/extract/index.d.ts +2 -0
  7. package/lib/commonjs/getActionProcessor/webserver/extract/index.js +17 -0
  8. package/lib/commonjs/getActionProcessor/webserver/index.d.ts +1 -0
  9. package/lib/commonjs/getActionProcessor/webserver/index.js +3 -1
  10. package/lib/commonjs/logic/dynamo/qpqDynamoOrm/buildDynamoUpdate.d.ts +1 -2
  11. package/lib/commonjs/logic/dynamo/qpqDynamoOrm/buildDynamoUpdate.js +43 -21
  12. package/lib/commonjs/logic/s3/generatePresignedUploadUrl.d.ts +1 -0
  13. package/lib/commonjs/logic/s3/generatePresignedUploadUrl.js +59 -0
  14. package/lib/commonjs/logic/textract/analyzeExpense.d.ts +51 -0
  15. package/lib/commonjs/logic/textract/analyzeExpense.js +31 -0
  16. package/lib/commonjs/logic/textract/index.d.ts +2 -0
  17. package/lib/commonjs/logic/textract/index.js +18 -0
  18. package/lib/commonjs/logic/textract/transformExpenseResponse.d.ts +29 -0
  19. package/lib/commonjs/logic/textract/transformExpenseResponse.js +180 -0
  20. package/lib/esm/getActionProcessor/core/file/getFileGenerateTemporaryUploadSecureUrlActionProcessor.d.ts +2 -0
  21. package/lib/esm/getActionProcessor/core/file/getFileGenerateTemporaryUploadSecureUrlActionProcessor.js +20 -0
  22. package/lib/esm/getActionProcessor/core/file/index.js +2 -0
  23. package/lib/esm/getActionProcessor/webserver/extract/getExtractExpenseActionProcessor.d.ts +2 -0
  24. package/lib/esm/getActionProcessor/webserver/extract/getExtractExpenseActionProcessor.js +40 -0
  25. package/lib/esm/getActionProcessor/webserver/extract/index.d.ts +2 -0
  26. package/lib/esm/getActionProcessor/webserver/extract/index.js +4 -0
  27. package/lib/esm/getActionProcessor/webserver/index.d.ts +1 -0
  28. package/lib/esm/getActionProcessor/webserver/index.js +3 -0
  29. package/lib/esm/logic/dynamo/qpqDynamoOrm/buildDynamoUpdate.d.ts +1 -2
  30. package/lib/esm/logic/dynamo/qpqDynamoOrm/buildDynamoUpdate.js +42 -19
  31. package/lib/esm/logic/s3/generatePresignedUploadUrl.d.ts +1 -0
  32. package/lib/esm/logic/s3/generatePresignedUploadUrl.js +49 -0
  33. package/lib/esm/logic/textract/analyzeExpense.d.ts +51 -0
  34. package/lib/esm/logic/textract/analyzeExpense.js +18 -0
  35. package/lib/esm/logic/textract/index.d.ts +2 -0
  36. package/lib/esm/logic/textract/index.js +2 -0
  37. package/lib/esm/logic/textract/transformExpenseResponse.d.ts +29 -0
  38. package/lib/esm/logic/textract/transformExpenseResponse.js +173 -0
  39. package/package.json +8 -6
@@ -0,0 +1,173 @@
1
+ const FIELD_TYPE_MAPPINGS = {
2
+ 'VENDOR_NAME': 'merchantName',
3
+ 'NAME': 'merchantName',
4
+ 'MERCHANT_NAME': 'merchantName',
5
+ 'VENDOR_ADDRESS': 'merchantAddress',
6
+ 'ADDRESS': 'merchantAddress',
7
+ 'MERCHANT_ADDRESS': 'merchantAddress',
8
+ 'INVOICE_RECEIPT_DATE': 'date',
9
+ 'DATE': 'date',
10
+ 'CURRENCY_CODE': 'currency',
11
+ 'PAYMENT_METHOD': 'paymentMethod',
12
+ 'SUBTOTAL': 'subtotal',
13
+ 'TAX': 'tax',
14
+ 'TOTAL': 'total',
15
+ 'AMOUNT_DUE': 'total',
16
+ 'AMOUNT_PAID': 'amountPaid',
17
+ 'INVOICE_RECEIPT_ID': 'receiptNumber',
18
+ 'INVOICE_ID': 'invoiceNumber',
19
+ 'RECEIPT_ID': 'receiptNumber',
20
+ 'TAX_PAYER_ID': 'taxId',
21
+ 'VENDOR_ABN_NUMBER': 'vendorAbn',
22
+ 'VENDOR_PHONE': 'vendorPhone',
23
+ 'VENDOR_URL': 'vendorUrl',
24
+ };
25
+ const LINE_ITEM_FIELD_MAPPINGS = {
26
+ 'ITEM': 'description',
27
+ 'DESCRIPTION': 'description',
28
+ 'PRODUCT_NAME': 'description',
29
+ 'QUANTITY': 'quantity',
30
+ 'UNIT_PRICE': 'unitPrice',
31
+ 'PRICE': 'total',
32
+ 'AMOUNT': 'total',
33
+ };
34
+ function extractFieldValue(field, forceString = false) {
35
+ const text = field?.ValueDetection?.Text || field?.Type?.Text;
36
+ if (!text)
37
+ return undefined;
38
+ // For certain fields, always return as string (dates, IDs, etc.)
39
+ if (forceString)
40
+ return text;
41
+ // Only try to parse as number if it looks like a price/amount (has $ or is purely numeric)
42
+ const cleanedText = text.replace(/[$,]/g, '').trim();
43
+ // Check if it's a pure number or price
44
+ if (/^\d+\.?\d*$/.test(cleanedText) && !text.includes('/') && !text.includes('-')) {
45
+ const numericValue = parseFloat(cleanedText);
46
+ if (!isNaN(numericValue))
47
+ return numericValue;
48
+ }
49
+ return text;
50
+ }
51
+ function extractSummaryFields(summaryFields) {
52
+ const metadata = {};
53
+ if (!summaryFields)
54
+ return metadata;
55
+ // Fields that should always be strings
56
+ const stringFields = new Set([
57
+ 'DATE', 'INVOICE_RECEIPT_DATE', 'CURRENCY_CODE', 'PAYMENT_METHOD',
58
+ 'INVOICE_RECEIPT_ID', 'INVOICE_ID', 'RECEIPT_ID', 'TAX_PAYER_ID',
59
+ 'VENDOR_ABN_NUMBER', 'VENDOR_PHONE', 'VENDOR_URL'
60
+ ]);
61
+ // Track confidence scores for duplicate fields
62
+ const fieldConfidence = {};
63
+ for (const field of summaryFields) {
64
+ const fieldType = field?.Type?.Text?.toUpperCase();
65
+ if (!fieldType)
66
+ continue;
67
+ const confidence = field?.Type?.Confidence || 0;
68
+ const forceString = stringFields.has(fieldType) || fieldType.includes('DATE') || fieldType.includes('ID') || fieldType.includes('NUMBER') || fieldType.includes('PHONE') || fieldType.includes('URL');
69
+ if (FIELD_TYPE_MAPPINGS[fieldType]) {
70
+ const mappedKey = FIELD_TYPE_MAPPINGS[fieldType];
71
+ const value = extractFieldValue(field, forceString);
72
+ // Only update if value exists and has higher confidence than existing
73
+ if (value !== undefined) {
74
+ const existingConfidence = fieldConfidence[mappedKey] || 0;
75
+ if (confidence > existingConfidence) {
76
+ metadata[mappedKey] = value;
77
+ fieldConfidence[mappedKey] = confidence;
78
+ }
79
+ }
80
+ }
81
+ else {
82
+ // Skip fields that are clearly not useful
83
+ const fieldKey = fieldType.toLowerCase().replace(/_/g, '');
84
+ // Skip certain field types that aren't useful in our structure
85
+ if (['street', 'city', 'addressblock', 'expenserow', 'other'].includes(fieldKey)) {
86
+ continue;
87
+ }
88
+ // Store other unmapped fields for extensibility
89
+ const value = extractFieldValue(field, forceString);
90
+ if (value !== undefined && !fieldKey.startsWith('vendor_') && !fieldKey.startsWith('amount_')) {
91
+ // Use more readable field names
92
+ const cleanKey = fieldType.toLowerCase().replace(/_/g, '');
93
+ metadata[cleanKey] = value;
94
+ }
95
+ }
96
+ }
97
+ // Clean up merchant name - take the first line if multiline
98
+ if (metadata.merchantName && metadata.merchantName.includes('\n')) {
99
+ metadata.merchantName = metadata.merchantName.split('\n')[0].trim();
100
+ }
101
+ return metadata;
102
+ }
103
+ function extractLineItems(lineItemGroups) {
104
+ if (!lineItemGroups || lineItemGroups.length === 0)
105
+ return undefined;
106
+ const items = [];
107
+ for (const group of lineItemGroups) {
108
+ if (!group.LineItems)
109
+ continue;
110
+ for (const lineItem of group.LineItems) {
111
+ if (!lineItem.LineItemExpenseFields)
112
+ continue;
113
+ const item = { description: '' };
114
+ for (const field of lineItem.LineItemExpenseFields) {
115
+ const fieldType = field?.Type?.Text?.toUpperCase();
116
+ if (!fieldType)
117
+ continue;
118
+ // Determine if this field should be a string
119
+ const forceString = fieldType === 'ITEM' || fieldType === 'DESCRIPTION' || fieldType === 'PRODUCT_NAME';
120
+ if (LINE_ITEM_FIELD_MAPPINGS[fieldType]) {
121
+ const mappedKey = LINE_ITEM_FIELD_MAPPINGS[fieldType];
122
+ const value = extractFieldValue(field, forceString);
123
+ if (value !== undefined) {
124
+ item[mappedKey] = value;
125
+ }
126
+ }
127
+ else if (fieldType !== 'EXPENSE_ROW') {
128
+ // Store unmapped fields except expense_row which is just raw text
129
+ const value = extractFieldValue(field, forceString);
130
+ if (value !== undefined) {
131
+ const cleanKey = fieldType.toLowerCase().replace(/_/g, '');
132
+ item[cleanKey] = value;
133
+ }
134
+ }
135
+ }
136
+ // Only add if we have at least a description or some meaningful data
137
+ if (item.description || (Object.keys(item).length > 1 && (item.total || item.quantity))) {
138
+ items.push(item);
139
+ }
140
+ }
141
+ }
142
+ return items.length > 0 ? items : undefined;
143
+ }
144
+ function extractRawText(blocks) {
145
+ if (!blocks)
146
+ return undefined;
147
+ const textBlocks = blocks
148
+ .filter(block => block.BlockType === 'LINE' && block.Text)
149
+ .map(block => block.Text)
150
+ .join('\n');
151
+ return textBlocks || undefined;
152
+ }
153
+ export function transformTextractExpenseResponse(textractResponse, storageDrive, filePath, includeRaw = true) {
154
+ // Process the first expense document (most receipts/invoices have just one)
155
+ const expenseDoc = textractResponse.ExpenseDocuments?.[0];
156
+ const metadata = extractSummaryFields(expenseDoc?.SummaryFields);
157
+ const lineItems = extractLineItems(expenseDoc?.LineItemGroups);
158
+ const rawText = extractRawText(expenseDoc?.Blocks);
159
+ const result = {
160
+ metadata,
161
+ lineItems,
162
+ rawText,
163
+ source: {
164
+ storageDrive: storageDrive,
165
+ filePath: filePath,
166
+ },
167
+ };
168
+ // Include raw response for debugging (can be disabled in production)
169
+ if (includeRaw) {
170
+ result._raw = textractResponse;
171
+ }
172
+ return result;
173
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "quidproquo-actionprocessor-awslambda",
3
- "version": "0.0.256",
3
+ "version": "0.0.258",
4
4
  "description": "",
5
5
  "main": "./lib/commonjs/index.js",
6
6
  "module": "./lib/esm/index.js",
@@ -46,7 +46,10 @@
46
46
  "@aws-sdk/client-sns": "^3.744.0",
47
47
  "@aws-sdk/client-sqs": "^3.744.0",
48
48
  "@aws-sdk/client-ssm": "^3.744.0",
49
+ "@aws-sdk/client-textract": "^3.883.0",
50
+ "@aws-sdk/s3-presigned-post": "^3.744.0",
49
51
  "@aws-sdk/s3-request-presigner": "^3.744.0",
52
+ "@types/aws-lambda": "^8.10.109",
50
53
  "aws-jwt-verify": "^3.4.0",
51
54
  "aws-sdk": "^2.1322.0",
52
55
  "busboy": "^1.6.0",
@@ -55,17 +58,16 @@
55
58
  "lodash": "^4.17.21",
56
59
  "node-cache": "^5.1.2",
57
60
  "node-match-path": "^0.6.3",
58
- "quidproquo-config-aws": "0.0.256",
59
- "quidproquo-core": "0.0.256",
60
- "quidproquo-webserver": "0.0.256",
61
- "@types/aws-lambda": "^8.10.109"
61
+ "quidproquo-config-aws": "0.0.258",
62
+ "quidproquo-core": "0.0.258",
63
+ "quidproquo-webserver": "0.0.258"
62
64
  },
63
65
  "devDependencies": {
64
66
  "@types/busboy": "^1.5.0",
65
67
  "@types/jsonwebtoken": "^9.0.2",
66
68
  "@types/lodash": "^4.14.194",
67
69
  "@types/node": "^22.13.13",
68
- "quidproquo-tsconfig": "0.0.256",
70
+ "quidproquo-tsconfig": "0.0.258",
69
71
  "typescript": "^5.8.2"
70
72
  }
71
73
  }