@realtimex/folio 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/.env.example +20 -0
  2. package/README.md +63 -0
  3. package/api/server.ts +130 -0
  4. package/api/src/config/index.ts +96 -0
  5. package/api/src/middleware/auth.ts +128 -0
  6. package/api/src/middleware/errorHandler.ts +88 -0
  7. package/api/src/middleware/index.ts +4 -0
  8. package/api/src/middleware/rateLimit.ts +71 -0
  9. package/api/src/middleware/validation.ts +58 -0
  10. package/api/src/routes/accounts.ts +142 -0
  11. package/api/src/routes/baseline-config.ts +124 -0
  12. package/api/src/routes/chat.ts +154 -0
  13. package/api/src/routes/health.ts +61 -0
  14. package/api/src/routes/index.ts +35 -0
  15. package/api/src/routes/ingestions.ts +275 -0
  16. package/api/src/routes/migrate.ts +112 -0
  17. package/api/src/routes/policies.ts +121 -0
  18. package/api/src/routes/processing.ts +90 -0
  19. package/api/src/routes/rules.ts +11 -0
  20. package/api/src/routes/sdk.ts +100 -0
  21. package/api/src/routes/settings.ts +80 -0
  22. package/api/src/routes/setup.ts +389 -0
  23. package/api/src/routes/stats.ts +81 -0
  24. package/api/src/routes/tts.ts +190 -0
  25. package/api/src/services/BaselineConfigService.ts +208 -0
  26. package/api/src/services/ChatService.ts +204 -0
  27. package/api/src/services/GoogleDriveService.ts +331 -0
  28. package/api/src/services/GoogleSheetsService.ts +1107 -0
  29. package/api/src/services/IngestionService.ts +1187 -0
  30. package/api/src/services/ModelCapabilityService.ts +248 -0
  31. package/api/src/services/PolicyEngine.ts +1625 -0
  32. package/api/src/services/PolicyLearningService.ts +527 -0
  33. package/api/src/services/PolicyLoader.ts +249 -0
  34. package/api/src/services/RAGService.ts +391 -0
  35. package/api/src/services/SDKService.ts +249 -0
  36. package/api/src/services/supabase.ts +113 -0
  37. package/api/src/utils/Actuator.ts +284 -0
  38. package/api/src/utils/actions/ActionHandler.ts +34 -0
  39. package/api/src/utils/actions/AppendToGSheetAction.ts +260 -0
  40. package/api/src/utils/actions/AutoRenameAction.ts +58 -0
  41. package/api/src/utils/actions/CopyAction.ts +120 -0
  42. package/api/src/utils/actions/CopyToGDriveAction.ts +64 -0
  43. package/api/src/utils/actions/LogCsvAction.ts +48 -0
  44. package/api/src/utils/actions/NotifyAction.ts +39 -0
  45. package/api/src/utils/actions/RenameAction.ts +57 -0
  46. package/api/src/utils/actions/WebhookAction.ts +58 -0
  47. package/api/src/utils/actions/utils.ts +293 -0
  48. package/api/src/utils/llmResponse.ts +61 -0
  49. package/api/src/utils/logger.ts +67 -0
  50. package/bin/folio-deploy.js +12 -0
  51. package/bin/folio-setup.js +45 -0
  52. package/bin/folio.js +65 -0
  53. package/dist/api/server.js +106 -0
  54. package/dist/api/src/config/index.js +81 -0
  55. package/dist/api/src/middleware/auth.js +93 -0
  56. package/dist/api/src/middleware/errorHandler.js +73 -0
  57. package/dist/api/src/middleware/index.js +4 -0
  58. package/dist/api/src/middleware/rateLimit.js +43 -0
  59. package/dist/api/src/middleware/validation.js +54 -0
  60. package/dist/api/src/routes/accounts.js +110 -0
  61. package/dist/api/src/routes/baseline-config.js +91 -0
  62. package/dist/api/src/routes/chat.js +114 -0
  63. package/dist/api/src/routes/health.js +52 -0
  64. package/dist/api/src/routes/index.js +31 -0
  65. package/dist/api/src/routes/ingestions.js +207 -0
  66. package/dist/api/src/routes/migrate.js +91 -0
  67. package/dist/api/src/routes/policies.js +86 -0
  68. package/dist/api/src/routes/processing.js +75 -0
  69. package/dist/api/src/routes/rules.js +8 -0
  70. package/dist/api/src/routes/sdk.js +80 -0
  71. package/dist/api/src/routes/settings.js +68 -0
  72. package/dist/api/src/routes/setup.js +315 -0
  73. package/dist/api/src/routes/stats.js +62 -0
  74. package/dist/api/src/routes/tts.js +178 -0
  75. package/dist/api/src/services/BaselineConfigService.js +168 -0
  76. package/dist/api/src/services/ChatService.js +166 -0
  77. package/dist/api/src/services/GoogleDriveService.js +280 -0
  78. package/dist/api/src/services/GoogleSheetsService.js +795 -0
  79. package/dist/api/src/services/IngestionService.js +990 -0
  80. package/dist/api/src/services/ModelCapabilityService.js +179 -0
  81. package/dist/api/src/services/PolicyEngine.js +1353 -0
  82. package/dist/api/src/services/PolicyLearningService.js +397 -0
  83. package/dist/api/src/services/PolicyLoader.js +159 -0
  84. package/dist/api/src/services/RAGService.js +295 -0
  85. package/dist/api/src/services/SDKService.js +212 -0
  86. package/dist/api/src/services/supabase.js +72 -0
  87. package/dist/api/src/utils/Actuator.js +225 -0
  88. package/dist/api/src/utils/actions/ActionHandler.js +1 -0
  89. package/dist/api/src/utils/actions/AppendToGSheetAction.js +191 -0
  90. package/dist/api/src/utils/actions/AutoRenameAction.js +49 -0
  91. package/dist/api/src/utils/actions/CopyAction.js +112 -0
  92. package/dist/api/src/utils/actions/CopyToGDriveAction.js +55 -0
  93. package/dist/api/src/utils/actions/LogCsvAction.js +42 -0
  94. package/dist/api/src/utils/actions/NotifyAction.js +32 -0
  95. package/dist/api/src/utils/actions/RenameAction.js +51 -0
  96. package/dist/api/src/utils/actions/WebhookAction.js +51 -0
  97. package/dist/api/src/utils/actions/utils.js +237 -0
  98. package/dist/api/src/utils/llmResponse.js +63 -0
  99. package/dist/api/src/utils/logger.js +51 -0
  100. package/dist/assets/index-DzN8-j-e.css +1 -0
  101. package/dist/assets/index-Uy-ai3Dh.js +113 -0
  102. package/dist/favicon.svg +31 -0
  103. package/dist/folio-logo.svg +46 -0
  104. package/dist/index.html +14 -0
  105. package/docs-dev/FPE-spec.md +196 -0
  106. package/docs-dev/folio-prd.md +47 -0
  107. package/docs-dev/foundation-checklist.md +30 -0
  108. package/docs-dev/hybrid-routing-architecture.md +205 -0
  109. package/docs-dev/ingestion-engine.md +69 -0
  110. package/docs-dev/port-from-email-automator.md +32 -0
  111. package/docs-dev/tech-spec.md +98 -0
  112. package/index.html +13 -0
  113. package/package.json +101 -0
  114. package/public/favicon.svg +31 -0
  115. package/public/folio-logo.svg +46 -0
  116. package/scripts/dev-task.mjs +51 -0
  117. package/scripts/get-latest-migration-timestamp.mjs +34 -0
  118. package/scripts/migrate.sh +91 -0
  119. package/supabase/.temp/cli-latest +1 -0
  120. package/supabase/.temp/gotrue-version +1 -0
  121. package/supabase/.temp/pooler-url +1 -0
  122. package/supabase/.temp/postgres-version +1 -0
  123. package/supabase/.temp/project-ref +1 -0
  124. package/supabase/.temp/rest-version +1 -0
  125. package/supabase/.temp/storage-migration +1 -0
  126. package/supabase/.temp/storage-version +1 -0
  127. package/supabase/config.toml +64 -0
  128. package/supabase/functions/_shared/auth.ts +35 -0
  129. package/supabase/functions/_shared/cors.ts +12 -0
  130. package/supabase/functions/_shared/supabaseAdmin.ts +17 -0
  131. package/supabase/functions/api-v1-settings/index.ts +66 -0
  132. package/supabase/functions/setup/index.ts +91 -0
  133. package/supabase/migrations/20260223000000_initial_foundation.sql +136 -0
  134. package/supabase/migrations/20260223000001_add_migration_rpc.sql +10 -0
  135. package/supabase/migrations/20260224000002_add_init_state_view.sql +20 -0
  136. package/supabase/migrations/20260224000003_port_user_creation_parity.sql +139 -0
  137. package/supabase/migrations/20260224000004_add_avatars_storage.sql +26 -0
  138. package/supabase/migrations/20260224000005_add_tts_and_embed_settings.sql +24 -0
  139. package/supabase/migrations/20260224000006_add_policies_table.sql +48 -0
  140. package/supabase/migrations/20260224000007_fix_migration_rpc.sql +9 -0
  141. package/supabase/migrations/20260224000008_add_ingestions_table.sql +42 -0
  142. package/supabase/migrations/20260225000000_setup_compatible_mode.sql +119 -0
  143. package/supabase/migrations/20260225000001_restore_ingestions.sql +49 -0
  144. package/supabase/migrations/20260225000002_add_ingestion_trace.sql +2 -0
  145. package/supabase/migrations/20260225000003_add_baseline_configs.sql +35 -0
  146. package/supabase/migrations/20260226000000_add_processing_events.sql +26 -0
  147. package/supabase/migrations/20260226000001_add_ingestion_file_hash.sql +10 -0
  148. package/supabase/migrations/20260226000002_add_dynamic_rag.sql +150 -0
  149. package/supabase/migrations/20260226000003_add_ingestion_summary.sql +4 -0
  150. package/supabase/migrations/20260226000004_add_ingestion_tags.sql +7 -0
  151. package/supabase/migrations/20260226000005_add_chat_tables.sql +60 -0
  152. package/supabase/migrations/20260227000000_harden_chat_messages_rls.sql +25 -0
  153. package/supabase/migrations/20260228000000_add_vision_model_capabilities.sql +8 -0
  154. package/supabase/migrations/20260228000001_add_policy_match_feedback.sql +51 -0
  155. package/supabase/migrations/29991231235959_test_migration.sql +0 -0
  156. package/supabase/templates/confirmation.html +76 -0
  157. package/supabase/templates/email-change.html +76 -0
  158. package/supabase/templates/invite.html +72 -0
  159. package/supabase/templates/magic-link.html +68 -0
  160. package/supabase/templates/recovery.html +82 -0
  161. package/tsconfig.api.json +16 -0
  162. package/tsconfig.json +25 -0
  163. package/vite.config.ts +146 -0
@@ -0,0 +1,1353 @@
1
+ import { createLogger } from "../utils/logger.js";
2
+ import { SDKService } from "./SDKService.js";
3
+ import { PolicyLoader } from "./PolicyLoader.js";
4
+ import { GoogleSheetsService } from "./GoogleSheetsService.js";
5
+ import { PolicyLearningService } from "./PolicyLearningService.js";
6
+ import { Actuator } from "../utils/Actuator.js";
7
+ import { extractLlmResponse, normalizeLlmContent, previewLlmText } from "../utils/llmResponse.js";
8
+ import { DEFAULT_BASELINE_FIELDS } from "./BaselineConfigService.js";
9
+ const logger = createLogger("PolicyEngine");
10
+ /**
11
+ * Helper to build LLM message content. If the text contains the VLM marker
12
+ * generated by IngestionService, it casts the payload to an OpenAI-compatible
13
+ * Vision array structure so the underlying SDK bridge can transmit the image.
14
+ */
15
+ function extractVlmPayload(text) {
16
+ const marker = text.match(/\[VLM_IMAGE_DATA:(data:[^;]+;base64,[^\]]+)\]/);
17
+ if (!marker)
18
+ return null;
19
+ const markerText = marker[0];
20
+ const supplementalText = text.replace(markerText, "").trim().slice(0, 4000);
21
+ return {
22
+ imageDataUrl: marker[1],
23
+ supplementalText,
24
+ };
25
+ }
26
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
27
+ function buildMessageContent(prompt, text, textFirst = false) {
28
+ const vlmPayload = extractVlmPayload(text);
29
+ if (vlmPayload) {
30
+ const textPrompt = vlmPayload.supplementalText
31
+ ? `${prompt}\n\nSupplemental extracted fields:\n${vlmPayload.supplementalText}`
32
+ : prompt;
33
+ return [
34
+ { type: "text", text: textPrompt },
35
+ { type: "image_url", image_url: { url: vlmPayload.imageDataUrl } }
36
+ ];
37
+ }
38
+ // Standard text payload
39
+ return textFirst
40
+ ? `Document text:\n\n${text.trim().slice(0, 8000)}\n\n${prompt}`
41
+ : `${prompt}\n\nDocument text:\n${text.trim().slice(0, 8000)}`;
42
+ }
43
+ /**
44
+ * Robustly extracts and parses a JSON object from an LLM string response.
45
+ */
46
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
47
+ function parseLlmJson(raw) {
48
+ const rawText = normalizeLlmContent(raw).trim();
49
+ if (!rawText)
50
+ return null;
51
+ const jsonMatch = rawText.match(/```(?:json)?\s*([\s\S]*?)```/) ?? rawText.match(/(\{[\s\S]*\})/);
52
+ let jsonStr = jsonMatch ? (jsonMatch[1] ?? jsonMatch[0]) : rawText;
53
+ if (!jsonStr)
54
+ return null;
55
+ // Sanitize smart quotes to standard double quotes
56
+ jsonStr = jsonStr.replace(/[\u2018\u2019]/g, "'").replace(/[\u201C\u201D\u201E\u201F\u2033\u2036]/g, '"');
57
+ try {
58
+ return JSON.parse(jsonStr);
59
+ }
60
+ catch {
61
+ return null;
62
+ }
63
+ }
64
+ function hasText(value) {
65
+ return typeof value === "string" && value.trim().length > 0;
66
+ }
67
+ function stripTrailingPunctuation(value) {
68
+ return value.replace(/[),.;]+$/g, "");
69
+ }
70
+ function hasGidInReference(value) {
71
+ return hasText(value) && /(?:[?#]gid=\d+)/i.test(value);
72
+ }
73
+ function isDefaultSheetOneRange(value) {
74
+ if (!hasText(value))
75
+ return false;
76
+ const trimmed = value.trim();
77
+ const bang = trimmed.indexOf("!");
78
+ const sheetRef = (bang >= 0 ? trimmed.slice(0, bang) : trimmed).trim();
79
+ const normalized = /^'.*'$/.test(sheetRef)
80
+ ? sheetRef.slice(1, -1).replace(/''/g, "'").trim().toLowerCase()
81
+ : sheetRef.toLowerCase();
82
+ return normalized === "sheet1";
83
+ }
84
+ function shouldPreserveRangeHint(hints) {
85
+ if (!hasText(hints.range))
86
+ return false;
87
+ if (hasGidInReference(hints.sheetReference) && isDefaultSheetOneRange(hints.range)) {
88
+ return false;
89
+ }
90
+ return true;
91
+ }
92
+ function normalizeTemplateFieldKey(value) {
93
+ const normalized = value
94
+ .toLowerCase()
95
+ .replace(/[^a-z0-9]+/g, "_")
96
+ .replace(/^_+|_+$/g, "");
97
+ return normalized || "value";
98
+ }
99
+ function inferTemplateFieldType(header) {
100
+ const normalized = normalizeTemplateFieldKey(header);
101
+ if (/(^|_)(date|due_date|invoice_date|receipt_date|service_date|posted_date)(_|$)/.test(normalized)) {
102
+ return "date";
103
+ }
104
+ if (/(^|_)(amount|total|subtotal|tax|price|cost|balance|fee|vat|discount|paid)(_|$)/.test(normalized)) {
105
+ return "currency";
106
+ }
107
+ if (/(^|_)(qty|quantity|count|units)(_|$)/.test(normalized)) {
108
+ return "number";
109
+ }
110
+ return "string";
111
+ }
112
+ function buildTemplateFieldHints(headers) {
113
+ const keyCounts = new Map();
114
+ return headers.map((header) => {
115
+ const baseKey = normalizeTemplateFieldKey(header);
116
+ const seen = keyCounts.get(baseKey) ?? 0;
117
+ keyCounts.set(baseKey, seen + 1);
118
+ const key = seen === 0 ? baseKey : `${baseKey}_${seen + 1}`;
119
+ return {
120
+ header,
121
+ key,
122
+ type: inferTemplateFieldType(header),
123
+ };
124
+ });
125
+ }
126
+ async function resolveSheetTemplateContext(hints, opts) {
127
+ if (!hasText(hints.sheetReference) || !hasText(opts.userId)) {
128
+ return {};
129
+ }
130
+ const preferredRange = shouldPreserveRangeHint(hints) ? hints.range : undefined;
131
+ const templateResult = await GoogleSheetsService.resolveTemplate(opts.userId, hints.sheetReference, preferredRange, opts.supabase);
132
+ if (!templateResult.success) {
133
+ return { warning: templateResult.error || "Failed to resolve Google Sheet template headers." };
134
+ }
135
+ const headers = (templateResult.headers ?? []).map((header) => header.trim()).filter(Boolean);
136
+ if (headers.length === 0) {
137
+ return { warning: "Google Sheet template has no headers in row 1." };
138
+ }
139
+ const fields = buildTemplateFieldHints(headers);
140
+ return {
141
+ context: {
142
+ spreadsheetReference: hints.sheetReference,
143
+ spreadsheetId: templateResult.spreadsheetId ?? hints.sheetReference,
144
+ range: templateResult.range ?? preferredRange ?? "Sheet1",
145
+ headers,
146
+ fields,
147
+ },
148
+ };
149
+ }
150
+ function applySheetTemplateContext(policy, template) {
151
+ if (!template || !policy || typeof policy !== "object" || !policy.spec || typeof policy.spec !== "object") {
152
+ return policy;
153
+ }
154
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
155
+ const spec = policy.spec;
156
+ if (!Array.isArray(spec.actions)) {
157
+ spec.actions = [];
158
+ }
159
+ const appendAction = spec.actions.find((action) => action?.type === "append_to_google_sheet");
160
+ if (appendAction) {
161
+ if (!hasText(appendAction.spreadsheet_id) && !hasText(appendAction.spreadsheet_url)) {
162
+ appendAction.spreadsheet_id = template.spreadsheetReference;
163
+ }
164
+ if (!hasText(appendAction.range) || isDefaultSheetOneRange(appendAction.range)) {
165
+ appendAction.range = template.range;
166
+ }
167
+ }
168
+ else {
169
+ spec.actions.push({
170
+ type: "append_to_google_sheet",
171
+ spreadsheet_id: template.spreadsheetReference,
172
+ range: template.range,
173
+ });
174
+ }
175
+ const existingExtract = Array.isArray(spec.extract) ? spec.extract : [];
176
+ const existingByKey = new Map();
177
+ for (const field of existingExtract) {
178
+ if (!field || typeof field.key !== "string")
179
+ continue;
180
+ const key = normalizeTemplateFieldKey(field.key);
181
+ if (!existingByKey.has(key)) {
182
+ existingByKey.set(key, field);
183
+ }
184
+ }
185
+ const usedKeys = new Set();
186
+ const orderedExtract = template.fields.map((templateField) => {
187
+ const existing = existingByKey.get(templateField.key);
188
+ usedKeys.add(templateField.key);
189
+ if (existing) {
190
+ return {
191
+ ...existing,
192
+ key: templateField.key,
193
+ type: existing.type ?? templateField.type,
194
+ description: existing.description?.trim() || `Extract value for Google Sheet column "${templateField.header}".`,
195
+ };
196
+ }
197
+ return {
198
+ key: templateField.key,
199
+ type: templateField.type,
200
+ description: `Extract value for Google Sheet column "${templateField.header}".`,
201
+ };
202
+ });
203
+ const extras = existingExtract.filter((field) => !usedKeys.has(normalizeTemplateFieldKey(field.key)));
204
+ spec.extract = [...orderedExtract, ...extras];
205
+ return policy;
206
+ }
207
+ function extractSynthesisTargetHints(description) {
208
+ const hints = {};
209
+ const gidMatch = description.match(/\b(?:gid|sheetid|sheet_id)\s*[:=]\s*(\d+)\b/i);
210
+ const gid = gidMatch?.[1];
211
+ const urlMatch = description.match(/https?:\/\/docs\.google\.com\/spreadsheets\/d\/[^\s)]+/i);
212
+ if (urlMatch?.[0]) {
213
+ const cleaned = stripTrailingPunctuation(urlMatch[0]);
214
+ // eslint-disable-next-line no-useless-escape
215
+ if (gid && !/[\?#]gid=\d+/i.test(cleaned)) {
216
+ hints.sheetReference = `${cleaned}#gid=${gid}`;
217
+ }
218
+ else {
219
+ hints.sheetReference = cleaned;
220
+ }
221
+ }
222
+ else {
223
+ const sheetIdMatch = description.match(/google\s*sheet(?:s)?(?:\s*(?:id|:))?[^A-Za-z0-9_-]*([A-Za-z0-9-_]{20,})/i);
224
+ if (sheetIdMatch?.[1]) {
225
+ hints.sheetReference = gid
226
+ ? `https://docs.google.com/spreadsheets/d/${sheetIdMatch[1]}/edit#gid=${gid}`
227
+ : sheetIdMatch[1];
228
+ }
229
+ }
230
+ const rangeMatch = description.match(/\brange\s*(?:is|=|:)?\s*([A-Za-z0-9_'"-]+![A-Za-z]+\d*(?::[A-Za-z]+\d*)?)/i);
231
+ if (rangeMatch?.[1]) {
232
+ hints.range = rangeMatch[1].trim();
233
+ }
234
+ const driveMatch = description.match(/\b(?:google\s*drive|gdrive|gdriver)?\s*folder(?:\s*id)?\s*(?:is|=|:)?\s*([A-Za-z0-9_-]{20,})\b/i);
235
+ if (driveMatch?.[1]) {
236
+ hints.driveFolderId = driveMatch[1];
237
+ }
238
+ return hints;
239
+ }
240
+ function applySynthesisTargetHints(policy, hints) {
241
+ if (!policy || typeof policy !== "object" || !policy.spec || typeof policy.spec !== "object") {
242
+ return policy;
243
+ }
244
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
245
+ const spec = policy.spec;
246
+ if (!Array.isArray(spec.actions)) {
247
+ spec.actions = [];
248
+ }
249
+ const actions = spec.actions;
250
+ if (hasText(hints.driveFolderId)) {
251
+ const driveAction = actions.find((action) => action?.type === "copy_to_gdrive");
252
+ if (driveAction) {
253
+ if (!hasText(driveAction.destination)) {
254
+ driveAction.destination = hints.driveFolderId;
255
+ }
256
+ }
257
+ else {
258
+ actions.push({
259
+ type: "copy_to_gdrive",
260
+ destination: hints.driveFolderId,
261
+ });
262
+ }
263
+ }
264
+ if (hasText(hints.sheetReference)) {
265
+ const sheetAction = actions.find((action) => action?.type === "append_to_google_sheet");
266
+ const preserveRangeHint = shouldPreserveRangeHint(hints);
267
+ if (sheetAction) {
268
+ if (!hasText(sheetAction.spreadsheet_id) && !hasText(sheetAction.spreadsheet_url)) {
269
+ sheetAction.spreadsheet_id = hints.sheetReference;
270
+ }
271
+ if (preserveRangeHint && !hasText(sheetAction.range)) {
272
+ sheetAction.range = hints.range;
273
+ }
274
+ }
275
+ else {
276
+ actions.push({
277
+ type: "append_to_google_sheet",
278
+ spreadsheet_id: hints.sheetReference,
279
+ ...(preserveRangeHint ? { range: hints.range } : {}),
280
+ });
281
+ }
282
+ }
283
+ return policy;
284
+ }
285
+ // ─── Matcher ────────────────────────────────────────────────────────────────
286
+ async function evaluateCondition(condition, doc, trace, settings = {}) {
287
+ const sdk = SDKService.getSDK();
288
+ if (condition.type === "keyword") {
289
+ const values = Array.isArray(condition.value) ? condition.value : [condition.value ?? ""];
290
+ const text = condition.case_sensitive ? doc.text : doc.text.toLowerCase();
291
+ return values.some((v) => {
292
+ const needle = condition.case_sensitive ? v : v.toLowerCase();
293
+ return text.includes(needle);
294
+ });
295
+ }
296
+ if (condition.type === "filename") {
297
+ const values = Array.isArray(condition.value) ? condition.value : [condition.value ?? ""];
298
+ const name = condition.case_sensitive ? doc.filePath : doc.filePath.toLowerCase();
299
+ return values.some((v) => {
300
+ const needle = condition.case_sensitive ? v : v.toLowerCase();
301
+ return name.includes(needle);
302
+ });
303
+ }
304
+ if (condition.type === "file_type" || condition.type === "mime_type") {
305
+ const ext = doc.filePath.split(".").pop()?.toLowerCase() ?? "";
306
+ // MIME subtype → extension exceptions where they differ
307
+ const MIME_TO_EXT = { plain: "txt", markdown: "md", "x-markdown": "md" };
308
+ const values = Array.isArray(condition.value) ? condition.value : [condition.value ?? ""];
309
+ return values.some((v) => {
310
+ const normalized = v.toLowerCase().replace(/^\./, "");
311
+ // Direct extension match: "pdf" or ".pdf"
312
+ if (normalized === ext)
313
+ return true;
314
+ // MIME type match: "application/pdf" → subtype "pdf"
315
+ if (normalized.includes("/")) {
316
+ const subtype = normalized.split("/").pop() ?? "";
317
+ return (MIME_TO_EXT[subtype] ?? subtype) === ext;
318
+ }
319
+ return false;
320
+ });
321
+ }
322
+ if (condition.type === "llm_verify" || condition.type === "semantic") {
323
+ if (!sdk)
324
+ return false;
325
+ // For semantic conditions, treat the value(s) as the verification prompt if no explicit prompt is set
326
+ const prompt = condition.prompt
327
+ ?? (Array.isArray(condition.value) ? condition.value.join("; ") : condition.value)
328
+ ?? "";
329
+ if (!prompt)
330
+ return false;
331
+ trace.push({ timestamp: new Date().toISOString(), step: `Evaluating ${condition.type} condition`, details: { prompt } });
332
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", { action: `Evaluating ${condition.type} condition`, prompt }, doc.supabase);
333
+ try {
334
+ const { provider, model } = await SDKService.resolveChatProvider(settings);
335
+ trace.push({
336
+ timestamp: new Date().toISOString(),
337
+ step: "LLM request (condition verify)",
338
+ details: {
339
+ provider,
340
+ model,
341
+ condition_type: condition.type,
342
+ prompt_preview: prompt.slice(0, 180),
343
+ vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
344
+ }
345
+ });
346
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
347
+ action: "LLM request (condition verify)",
348
+ provider,
349
+ model,
350
+ condition_type: condition.type,
351
+ prompt_preview: prompt.slice(0, 180),
352
+ vision_payload: doc.text.includes("[VLM_IMAGE_DATA:")
353
+ }, doc.supabase);
354
+ const result = await sdk.llm.chat([
355
+ {
356
+ role: "system",
357
+ content: "You are a document classifier. Answer with a single JSON object: { \"result\": true/false, \"confidence\": 0.0-1.0 }"
358
+ },
359
+ {
360
+ role: "user",
361
+ content: buildMessageContent(`Question: ${prompt}`, doc.text, true)
362
+ }
363
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
364
+ ], { provider, model });
365
+ const raw = extractLlmResponse(result);
366
+ trace.push({
367
+ timestamp: new Date().toISOString(),
368
+ step: "LLM response (condition verify)",
369
+ details: {
370
+ provider,
371
+ model,
372
+ raw_length: raw.length,
373
+ raw_preview: previewLlmText(raw),
374
+ }
375
+ });
376
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
377
+ action: "LLM response (condition verify)",
378
+ provider,
379
+ model,
380
+ raw_length: raw.length,
381
+ raw_preview: previewLlmText(raw),
382
+ }, doc.supabase);
383
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
384
+ const parsed = parseLlmJson(raw);
385
+ if (parsed && typeof parsed === "object" && typeof parsed.result === "boolean") {
386
+ const threshold = condition.confidence_threshold ?? 0.8;
387
+ const passed = parsed.result === true && (parsed.confidence ?? 1) >= threshold;
388
+ trace.push({ timestamp: new Date().toISOString(), step: `${condition.type} result`, details: { parsed, passed } });
389
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", { action: `${condition.type} result`, parsed, passed }, doc.supabase);
390
+ return passed;
391
+ }
392
+ }
393
+ catch (err) {
394
+ logger.warn(`${condition.type} condition failed`, { err });
395
+ }
396
+ return false;
397
+ }
398
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
399
+ logger.warn(`Unknown condition type "${condition.type}" — skipping`);
400
+ return false;
401
+ }
402
+ async function matchPolicy(policy, doc, trace, settings = {}) {
403
+ const { strategy, conditions } = policy.spec.match;
404
+ trace.push({ timestamp: new Date().toISOString(), step: `Evaluating policy rules`, details: { policyId: policy.metadata.id, strategy, conditionsCount: conditions.length } });
405
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", { action: "Evaluating policy rules", policyId: policy.metadata.id, strategy, conditionsCount: conditions.length }, doc.supabase);
406
+ if (strategy === "ALL") {
407
+ for (const cond of conditions) {
408
+ if (!(await evaluateCondition(cond, doc, trace, settings))) {
409
+ trace.push({ timestamp: new Date().toISOString(), step: `Match failed on condition`, details: { condition: cond } });
410
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", { action: "Match failed on condition", condition: cond }, doc.supabase);
411
+ return false;
412
+ }
413
+ }
414
+ trace.push({ timestamp: new Date().toISOString(), step: `All conditions matched`, details: { policyId: policy.metadata.id } });
415
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", { action: "All conditions matched", policyId: policy.metadata.id }, doc.supabase);
416
+ return true;
417
+ }
418
+ // ANY strategy
419
+ for (const cond of conditions) {
420
+ if (await evaluateCondition(cond, doc, trace, settings)) {
421
+ trace.push({ timestamp: new Date().toISOString(), step: `Condition matched (ANY strategy)`, details: { condition: cond } });
422
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", { action: "Condition matched (ANY strategy)", condition: cond }, doc.supabase);
423
+ return true;
424
+ }
425
+ }
426
+ trace.push({ timestamp: new Date().toISOString(), step: `No conditions matched (ANY strategy)` });
427
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", { action: "No conditions matched (ANY strategy)" }, doc.supabase);
428
+ return false;
429
+ }
430
+ // ─── Extractor ───────────────────────────────────────────────────────────────
431
+ async function extractData(fields, doc, trace, settings = {}) {
432
+ const sdk = SDKService.getSDK();
433
+ if (!sdk || fields.length === 0)
434
+ return {};
435
+ trace.push({ timestamp: new Date().toISOString(), step: "Starting data extraction", details: { fieldsCount: fields.length } });
436
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Data Extraction", { action: "Starting data extraction", fieldsCount: fields.length }, doc.supabase);
437
+ const { provider, model } = await SDKService.resolveChatProvider(settings);
438
+ const fieldDescriptions = fields
439
+ .map((f) => `- "${f.key}" (${f.type}): ${f.description}${f.required ? " [REQUIRED]" : ""}`)
440
+ .join("\n");
441
+ const prompt = `Extract the following fields from the document. Return ONLY a valid JSON object with the field keys and their extracted values. Use null for fields that cannot be found.
442
+
443
+ Fields to extract:
444
+ ${fieldDescriptions}`;
445
+ try {
446
+ const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
447
+ const mixedPrompt = isVlmPayload
448
+ ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
449
+ : prompt;
450
+ trace.push({
451
+ timestamp: new Date().toISOString(),
452
+ step: "LLM request (data extraction)",
453
+ details: {
454
+ provider,
455
+ model,
456
+ fields_count: fields.length,
457
+ vision_payload: isVlmPayload,
458
+ }
459
+ });
460
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Data Extraction", {
461
+ action: "LLM request (data extraction)",
462
+ provider,
463
+ model,
464
+ fields_count: fields.length,
465
+ vision_payload: isVlmPayload,
466
+ }, doc.supabase);
467
+ const result = await sdk.llm.chat(isVlmPayload
468
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
469
+ ? [{ role: "user", content: buildMessageContent(mixedPrompt, doc.text) }]
470
+ : [
471
+ { role: "system", content: "You are a precise data extraction engine. Return only valid JSON." },
472
+ { role: "user", content: buildMessageContent(prompt, doc.text) }
473
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
474
+ ], { provider, model });
475
+ const raw = extractLlmResponse(result);
476
+ trace.push({
477
+ timestamp: new Date().toISOString(),
478
+ step: "LLM response (data extraction)",
479
+ details: {
480
+ provider,
481
+ model,
482
+ raw_length: raw.length,
483
+ raw_preview: previewLlmText(raw),
484
+ }
485
+ });
486
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Data Extraction", {
487
+ action: "LLM response (data extraction)",
488
+ provider,
489
+ model,
490
+ raw_length: raw.length,
491
+ raw_preview: previewLlmText(raw),
492
+ }, doc.supabase);
493
+ const parsed = parseLlmJson(raw);
494
+ if (parsed) {
495
+ trace.push({ timestamp: new Date().toISOString(), step: "Data extracted successfully", details: { extractedKeys: Object.keys(parsed) } });
496
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Data Extraction", { action: "Data extracted successfully", extractedKeys: Object.keys(parsed), raw_response: parsed }, doc.supabase);
497
+ return parsed;
498
+ }
499
+ else {
500
+ logger.warn("Data extraction returned unparseable JSON", { raw: raw.slice(0, 300) });
501
+ trace.push({ timestamp: new Date().toISOString(), step: "Data extraction failed", details: { error: "Unparseable JSON from model" } });
502
+ Actuator.logEvent(doc.ingestionId, doc.userId, "error", "Data Extraction", { action: "Data extraction unparseable", raw_response: raw.slice(0, 300) }, doc.supabase);
503
+ }
504
+ }
505
+ catch (err) {
506
+ logger.error("Data extraction failed", { err });
507
+ trace.push({ timestamp: new Date().toISOString(), step: "Data extraction failed", details: { error: String(err) } });
508
+ Actuator.logEvent(doc.ingestionId, doc.userId, "error", "Data Extraction", { action: "Data extraction failed", error: String(err) }, doc.supabase);
509
+ }
510
+ return {};
511
+ }
512
+ function valueToPromptPreview(value) {
513
+ if (value == null)
514
+ return "null";
515
+ if (typeof value === "string")
516
+ return value;
517
+ if (typeof value === "number" || typeof value === "boolean")
518
+ return String(value);
519
+ try {
520
+ return JSON.stringify(value);
521
+ }
522
+ catch {
523
+ return String(value);
524
+ }
525
+ }
526
+ function hasMeaningfulValue(value) {
527
+ if (value == null)
528
+ return false;
529
+ if (typeof value === "string")
530
+ return value.trim().length > 0;
531
+ if (typeof value === "number")
532
+ return Number.isFinite(value);
533
+ if (typeof value === "boolean")
534
+ return true;
535
+ if (Array.isArray(value))
536
+ return value.some((item) => hasMeaningfulValue(item));
537
+ if (typeof value === "object")
538
+ return Object.values(value).some((item) => hasMeaningfulValue(item));
539
+ return true;
540
+ }
541
+ function removeDuplicateOrEmptyEnrichmentFields(enrichment, contractData) {
542
+ const existingKeys = new Set(Object.keys(contractData).map((key) => normalizeTemplateFieldKey(key)));
543
+ const cleaned = {};
544
+ for (const [key, value] of Object.entries(enrichment)) {
545
+ const normalizedKey = normalizeTemplateFieldKey(key);
546
+ if (existingKeys.has(normalizedKey))
547
+ continue;
548
+ if (!hasMeaningfulValue(value))
549
+ continue;
550
+ cleaned[key] = value;
551
+ }
552
+ return cleaned;
553
+ }
554
+ async function extractEnrichmentData(doc, contractData, trace, settings = {}) {
555
+ const sdk = SDKService.getSDK();
556
+ if (!sdk || Object.keys(contractData).length === 0)
557
+ return {};
558
+ const { provider, model } = await SDKService.resolveChatProvider(settings);
559
+ const knownBlock = Object.entries(contractData)
560
+ .slice(0, 80)
561
+ .map(([key, value]) => `- "${key}": ${valueToPromptPreview(value).slice(0, 300)}`)
562
+ .join("\n");
563
+ const prompt = `You already extracted the core template fields below.
564
+ Return ONLY a valid JSON object with ADDITIONAL useful fields found in the document (do not repeat existing keys).
565
+
566
+ Already extracted fields:
567
+ ${knownBlock || "- none"}
568
+
569
+ Add only high-confidence extra signals useful for downstream automation or analytics, such as:
570
+ - line_items: array of { description, quantity, unit_price, total }
571
+ - tax_breakdown, discounts, payment_details
572
+ - merchant_metadata, customer_metadata
573
+ - notes or compliance identifiers
574
+
575
+ Rules:
576
+ - Return {} if no reliable extras exist.
577
+ - Keep keys in snake_case.
578
+ - Do not overwrite or repeat existing keys.
579
+ - JSON only. No markdown or explanations.`;
580
+ try {
581
+ trace.push({
582
+ timestamp: new Date().toISOString(),
583
+ step: "LLM request (enrichment extraction)",
584
+ details: {
585
+ provider,
586
+ model,
587
+ known_fields_count: Object.keys(contractData).length,
588
+ },
589
+ });
590
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Data Extraction", {
591
+ action: "LLM request (enrichment extraction)",
592
+ provider,
593
+ model,
594
+ known_fields_count: Object.keys(contractData).length,
595
+ }, doc.supabase);
596
+ const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
597
+ const mixedPrompt = isVlmPayload
598
+ ? `You are a precise data extraction engine. Return only valid JSON.\n\n${prompt}`
599
+ : prompt;
600
+ const result = await sdk.llm.chat(isVlmPayload
601
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
602
+ ? [{ role: "user", content: buildMessageContent(mixedPrompt, doc.text) }]
603
+ : [
604
+ { role: "system", content: "You are a precise data extraction engine. Return only valid JSON." },
605
+ { role: "user", content: buildMessageContent(prompt, doc.text) },
606
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
607
+ ], { provider, model });
608
+ const raw = extractLlmResponse(result);
609
+ trace.push({
610
+ timestamp: new Date().toISOString(),
611
+ step: "LLM response (enrichment extraction)",
612
+ details: {
613
+ provider,
614
+ model,
615
+ raw_length: raw.length,
616
+ raw_preview: previewLlmText(raw),
617
+ },
618
+ });
619
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Data Extraction", {
620
+ action: "LLM response (enrichment extraction)",
621
+ provider,
622
+ model,
623
+ raw_length: raw.length,
624
+ raw_preview: previewLlmText(raw),
625
+ }, doc.supabase);
626
+ const parsed = parseLlmJson(raw);
627
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
628
+ trace.push({
629
+ timestamp: new Date().toISOString(),
630
+ step: "Enrichment extraction skipped",
631
+ details: { reason: "Unparseable enrichment JSON" },
632
+ });
633
+ return {};
634
+ }
635
+ const cleaned = removeDuplicateOrEmptyEnrichmentFields(parsed, contractData);
636
+ trace.push({
637
+ timestamp: new Date().toISOString(),
638
+ step: "Enrichment extraction complete",
639
+ details: {
640
+ extracted_keys: Object.keys(cleaned),
641
+ extracted_count: Object.keys(cleaned).length,
642
+ },
643
+ });
644
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Data Extraction", {
645
+ action: "Enrichment extraction complete",
646
+ extracted_keys: Object.keys(cleaned),
647
+ extracted_count: Object.keys(cleaned).length,
648
+ }, doc.supabase);
649
+ return cleaned;
650
+ }
651
+ catch (err) {
652
+ const msg = err instanceof Error ? err.message : String(err);
653
+ trace.push({
654
+ timestamp: new Date().toISOString(),
655
+ step: "Enrichment extraction failed",
656
+ details: { error: msg },
657
+ });
658
+ Actuator.logEvent(doc.ingestionId, doc.userId, "error", "Data Extraction", {
659
+ action: "Enrichment extraction failed",
660
+ error: msg,
661
+ }, doc.supabase);
662
+ return {};
663
+ }
664
+ }
665
+ function attachEnrichment(contractData, enrichment) {
666
+ if (Object.keys(enrichment).length === 0) {
667
+ return contractData;
668
+ }
669
+ return {
670
+ ...contractData,
671
+ _enrichment: enrichment,
672
+ };
673
+ }
674
+ function toActionData(data) {
675
+ const normalized = {};
676
+ for (const [key, value] of Object.entries(data)) {
677
+ if (value == null) {
678
+ normalized[key] = null;
679
+ continue;
680
+ }
681
+ if (typeof value === "string" || typeof value === "number") {
682
+ normalized[key] = value;
683
+ continue;
684
+ }
685
+ if (typeof value === "boolean") {
686
+ normalized[key] = value ? "true" : "false";
687
+ continue;
688
+ }
689
+ try {
690
+ normalized[key] = JSON.stringify(value);
691
+ }
692
+ catch {
693
+ normalized[key] = String(value);
694
+ }
695
+ }
696
+ return normalized;
697
+ }
698
+ async function executeMatchedPolicy(policy, doc, trace, settings, baselineEntities, matchSource = "rules") {
699
+ trace.push({
700
+ timestamp: new Date().toISOString(),
701
+ step: "Matched policy selected",
702
+ details: {
703
+ policyId: policy.metadata.id,
704
+ policyName: policy.metadata.name,
705
+ matchSource,
706
+ }
707
+ });
708
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", {
709
+ action: "Matched policy selected",
710
+ policyId: policy.metadata.id,
711
+ policyName: policy.metadata.name,
712
+ matchSource,
713
+ }, doc.supabase);
714
+ const extractedData = await extractData(policy.spec.extract ?? [], doc, trace, settings);
715
+ const hasSheetAppendAction = (policy.spec.actions ?? []).some((action) => action.type === "append_to_google_sheet");
716
+ const enrichmentData = hasSheetAppendAction
717
+ ? await extractEnrichmentData(doc, extractedData, trace, settings)
718
+ : {};
719
+ const extractedForStorage = attachEnrichment(extractedData, enrichmentData);
720
+ const missingRequired = (policy.spec.extract ?? [])
721
+ .filter((f) => f.required && extractedData[f.key] == null)
722
+ .map((f) => f.key);
723
+ if (missingRequired.length > 0) {
724
+ trace.push({ timestamp: new Date().toISOString(), step: "Missing required fields", details: { missingRequired } });
725
+ Actuator.logEvent(doc.ingestionId, doc.userId, "error", "Data Extraction", { action: "Missing required fields", missingRequired }, doc.supabase);
726
+ return {
727
+ filePath: doc.filePath,
728
+ matchedPolicy: policy.metadata.id,
729
+ extractedData: extractedForStorage,
730
+ actionsExecuted: [],
731
+ status: "error",
732
+ error: `Missing required fields: ${missingRequired.join(", ")}`,
733
+ trace,
734
+ };
735
+ }
736
+ const actuatorResult = await Actuator.execute(doc.ingestionId, doc.userId, policy.spec.actions ?? [], toActionData({ ...baselineEntities, ...extractedData }), { path: doc.filePath, name: doc.filePath.split('/').pop() || doc.filePath }, policy.spec.extract ?? [], doc.supabase);
737
+ trace.push(...actuatorResult.trace);
738
+ return {
739
+ filePath: doc.filePath,
740
+ matchedPolicy: policy.metadata.id,
741
+ extractedData: extractedForStorage,
742
+ actionsExecuted: actuatorResult.actionsExecuted,
743
+ status: "matched",
744
+ error: actuatorResult.errors[0],
745
+ trace,
746
+ };
747
+ }
748
+ // ─── Engine ──────────────────────────────────────────────────────────────────
749
+ export class PolicyEngine {
750
+ /**
751
+ * Run a document through the policy pipeline.
752
+ * Returns the first matched policy result, or the fallback.
753
+ */
754
+ static async process(doc, settings = {}, baselineEntities = {}) {
755
+ logger.info(`Processing document: ${doc.filePath}`);
756
+ const policies = await PolicyLoader.load();
757
+ const globalTrace = [{ timestamp: new Date().toISOString(), step: "Loaded policies", details: { count: policies.length } }];
758
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Triage", { action: "Loaded policies", count: policies.length }, doc.supabase);
759
+ for (const policy of policies) {
760
+ try {
761
+ const matched = await matchPolicy(policy, doc, globalTrace, settings);
762
+ if (!matched)
763
+ continue;
764
+ logger.info(`Matched policy: ${policy.metadata.id} (priority: ${policy.metadata.priority})`);
765
+ return executeMatchedPolicy(policy, doc, globalTrace, settings, baselineEntities, "rules");
766
+ }
767
+ catch (err) {
768
+ logger.error(`Error evaluating policy ${policy.metadata.id}`, { err });
769
+ }
770
+ }
771
+ // Fallback: Inbox Zero
772
+ globalTrace.push({ timestamp: new Date().toISOString(), step: "No policy matched - routed to fallback" });
773
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Triage", { action: "No policy matched - routed to fallback" }, doc.supabase);
774
+ logger.info(`No policy matched — routing to fallback`);
775
+ return {
776
+ filePath: doc.filePath,
777
+ matchedPolicy: null,
778
+ extractedData: {},
779
+ actionsExecuted: ["Moved to /_Needs_Review"],
780
+ status: "fallback",
781
+ trace: globalTrace
782
+ };
783
+ }
784
+ /**
785
+ * Same as process() but uses a pre-loaded list of policies.
786
+ * Used by IngestionService so user-scoped policies are evaluated.
787
+ */
788
+ static async processWithPolicies(doc, policies, settings = {}, baselineEntities = {}, opts = {}) {
789
+ logger.info(`Processing document with ${policies.length} policies: ${doc.filePath}`);
790
+ const globalTrace = [{ timestamp: new Date().toISOString(), step: "Loaded user policies", details: { count: policies.length } }];
791
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Triage", { action: "Loaded user policies", count: policies.length }, doc.supabase);
792
+ const forcedPolicyId = opts.forcedPolicyId?.trim();
793
+ for (const policy of policies) {
794
+ if (forcedPolicyId && policy.metadata.id !== forcedPolicyId) {
795
+ continue;
796
+ }
797
+ try {
798
+ const isManualOverride = forcedPolicyId === policy.metadata.id;
799
+ const matched = isManualOverride ? true : await matchPolicy(policy, doc, globalTrace, settings);
800
+ if (!matched)
801
+ continue;
802
+ if (isManualOverride) {
803
+ globalTrace.push({
804
+ timestamp: new Date().toISOString(),
805
+ step: "Manual override policy selected",
806
+ details: { policyId: policy.metadata.id, policyName: policy.metadata.name },
807
+ });
808
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Policy Matching", {
809
+ action: "Manual override policy selected",
810
+ policyId: policy.metadata.id,
811
+ policyName: policy.metadata.name,
812
+ }, doc.supabase);
813
+ }
814
+ logger.info(`Matched policy: ${policy.metadata.id}${isManualOverride ? " (manual override)" : ""}`);
815
+ return executeMatchedPolicy(policy, doc, globalTrace, settings, baselineEntities, isManualOverride ? "manual_override" : "rules");
816
+ }
817
+ catch (err) {
818
+ logger.error(`Error evaluating policy ${policy.metadata.id}`, { err });
819
+ }
820
+ }
821
+ const allowLearnedFallback = opts.allowLearnedFallback !== false && !forcedPolicyId;
822
+ if (allowLearnedFallback && doc.supabase && policies.length > 0) {
823
+ try {
824
+ const learningText = doc.text.replace(/\[VLM_IMAGE_DATA:[^\]]+\]/g, "");
825
+ const learned = await PolicyLearningService.resolveLearnedCandidate({
826
+ supabase: doc.supabase,
827
+ userId: doc.userId,
828
+ policyIds: policies.map((policy) => policy.metadata.id),
829
+ filePath: doc.filePath,
830
+ baselineEntities,
831
+ documentText: learningText,
832
+ });
833
+ if (learned.candidate) {
834
+ const learnedPolicy = policies.find((policy) => policy.metadata.id === learned.candidate?.policyId);
835
+ if (learnedPolicy) {
836
+ globalTrace.push({
837
+ timestamp: new Date().toISOString(),
838
+ step: "Learned policy candidate selected",
839
+ details: {
840
+ policyId: learned.candidate.policyId,
841
+ score: learned.candidate.score,
842
+ support: learned.candidate.support,
843
+ topCandidates: learned.diagnostics.topCandidates,
844
+ },
845
+ });
846
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
847
+ action: "Learned policy candidate selected",
848
+ policyId: learned.candidate.policyId,
849
+ score: learned.candidate.score,
850
+ support: learned.candidate.support,
851
+ topCandidates: learned.diagnostics.topCandidates,
852
+ }, doc.supabase);
853
+ logger.info(`Matched policy via learned fallback: ${learned.candidate.policyId} (score=${learned.candidate.score.toFixed(3)}, support=${learned.candidate.support})`);
854
+ return executeMatchedPolicy(learnedPolicy, doc, globalTrace, settings, baselineEntities, "learned");
855
+ }
856
+ }
857
+ globalTrace.push({
858
+ timestamp: new Date().toISOString(),
859
+ step: "Learned fallback analyzed",
860
+ details: learned.diagnostics,
861
+ });
862
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Policy Matching", {
863
+ action: "Learned fallback analyzed",
864
+ ...learned.diagnostics,
865
+ }, doc.supabase);
866
+ }
867
+ catch (learningError) {
868
+ logger.warn("Learned fallback lookup failed", { error: learningError });
869
+ }
870
+ }
871
+ globalTrace.push({ timestamp: new Date().toISOString(), step: "No policy matched - routed to fallback" });
872
+ Actuator.logEvent(doc.ingestionId, doc.userId, "info", "Triage", { action: "No policy matched - routed to fallback" }, doc.supabase);
873
+ return {
874
+ filePath: doc.filePath,
875
+ matchedPolicy: null,
876
+ extractedData: {},
877
+ actionsExecuted: [],
878
+ status: "fallback",
879
+ trace: globalTrace
880
+ };
881
+ }
882
+ /**
883
+ * Stage 1 of the optimised pipeline: extract baseline entities from a document
884
+ * using the user's active baseline config (or the built-in defaults).
885
+ *
886
+ * The result is always persisted on the ingestion record regardless of whether
887
+ * any policy ultimately matches — every document leaves the Fast Path with
888
+ * structured entities attached.
889
+ *
890
+ * Returns the extracted entity map plus a list of field keys the model flagged
891
+ * as uncertain or absent, which are later used by the confidence-gating logic
892
+ * to decide whether a targeted deep call is worth firing.
893
+ */
894
+ static async extractBaseline(doc, config, settings = {}) {
895
+ const sdk = SDKService.getSDK();
896
+ if (!sdk) {
897
+ logger.warn("SDK unavailable — skipping baseline extraction");
898
+ return { entities: {}, uncertain_fields: [], tags: [] };
899
+ }
900
+ let fields = (config.fields ?? DEFAULT_BASELINE_FIELDS).filter((f) => f.enabled);
901
+ // Always include suggested_filename so auto_rename actions have an AI-generated name
902
+ // even when the user's saved baseline config predates this field.
903
+ if (!fields.some((f) => f.key === "suggested_filename")) {
904
+ const suggestedField = DEFAULT_BASELINE_FIELDS.find((f) => f.key === "suggested_filename");
905
+ if (suggestedField)
906
+ fields = [...fields, suggestedField];
907
+ }
908
+ if (fields.length === 0)
909
+ return { entities: {}, uncertain_fields: [], tags: [] };
910
+ const { provider, model } = await SDKService.resolveChatProvider(settings);
911
+ const fieldList = fields
912
+ .map((f) => `- "${f.key}" (${f.type}): ${f.description}`)
913
+ .join("\n");
914
+ const contextBlock = config.context?.trim()
915
+ ? `\nAdditional context about this user's documents:\n${config.context.trim()}\n`
916
+ : "";
917
+ const systemPrompt = `You are a precise document entity extractor.${contextBlock}\n` +
918
+ `Return ONLY a valid JSON object with three keys:\n` +
919
+ ` "entities": an object containing each requested field (use null for absent fields),\n` +
920
+ ` "uncertain_fields": an array of field keys you are not confident about,\n` +
921
+ ` "tags": an array of 3-6 lowercase semantic labels that best classify this document ` +
922
+ `(e.g. "invoice", "utility", "tax-deductible", "receipt", "2025", "insurance"). ` +
923
+ `Include the calendar year if clearly present. Prefer hyphenated multi-word tags.\n` +
924
+ `No markdown, no explanation — only the JSON object.`;
925
+ const userPrompt = `Extract the following fields from the document:\n${fieldList}`;
926
+ const isVlmPayload = doc.text.startsWith("[VLM_IMAGE_DATA:");
927
+ const mixedPrompt = isVlmPayload ? `${systemPrompt}\n\n${userPrompt}` : userPrompt;
928
+ try {
929
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Baseline Extraction", {
930
+ action: "LLM request (baseline extraction)",
931
+ provider,
932
+ model,
933
+ fields_count: fields.length,
934
+ vision_payload: isVlmPayload,
935
+ }, doc.supabase);
936
+ const result = await sdk.llm.chat(isVlmPayload
937
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
938
+ ? [{ role: "user", content: buildMessageContent(mixedPrompt, doc.text) }]
939
+ : [
940
+ { role: "system", content: systemPrompt },
941
+ { role: "user", content: buildMessageContent(userPrompt, doc.text) },
942
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
943
+ ], { provider, model });
944
+ const raw = extractLlmResponse(result);
945
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Baseline Extraction", {
946
+ action: "LLM response (baseline extraction)",
947
+ provider,
948
+ model,
949
+ raw_length: raw.length,
950
+ raw_preview: previewLlmText(raw),
951
+ }, doc.supabase);
952
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
953
+ const parsed = parseLlmJson(raw);
954
+ if (!parsed) {
955
+ logger.warn("Baseline extraction returned unparseable JSON", { raw: raw.slice(0, 300) });
956
+ Actuator.logEvent(doc.ingestionId, doc.userId, "error", "Baseline Extraction", { action: "Baseline extraction unparseable", raw_response: raw.slice(0, 300) }, doc.supabase);
957
+ return { entities: {}, uncertain_fields: [], tags: [] };
958
+ }
959
+ const entities = parsed.entities ?? parsed;
960
+ const uncertain_fields = Array.isArray(parsed.uncertain_fields)
961
+ ? parsed.uncertain_fields
962
+ : [];
963
+ const tags = Array.isArray(parsed.tags)
964
+ ? parsed.tags.map((t) => String(t).toLowerCase().trim()).filter(Boolean)
965
+ : [];
966
+ logger.info(`Baseline extraction complete — ${Object.keys(entities).length} fields, ${uncertain_fields.length} uncertain, ${tags.length} tags`);
967
+ Actuator.logEvent(doc.ingestionId, doc.userId, "analysis", "Baseline Extraction", { action: "Baseline extraction complete", fields: Object.keys(entities).length, uncertain: uncertain_fields.length, tags, extracted: entities }, doc.supabase);
968
+ return { entities, uncertain_fields, tags };
969
+ }
970
+ catch (err) {
971
+ logger.error("Baseline extraction failed", { err });
972
+ Actuator.logEvent(doc.ingestionId, doc.userId, "error", "Baseline Extraction", { action: "Baseline extraction failed", error: String(err) }, doc.supabase);
973
+ return { entities: {}, uncertain_fields: [], tags: [] };
974
+ }
975
+ }
976
+ /**
977
+ * Suggest a baseline config (context + fields) from a workflow description.
978
+ * Returns a draft { context, fields } the user can review before saving.
979
+ */
980
+ static async suggestBaseline(description, currentFields, opts = {}) {
981
+ const sdk = SDKService.getSDK();
982
+ if (!sdk) {
983
+ const msg = "SDK not available for baseline suggestion";
984
+ logger.warn(msg);
985
+ return { suggestion: null, error: msg };
986
+ }
987
+ const defaults = await SDKService.getDefaultChatProvider();
988
+ const provider = opts.provider || defaults.provider;
989
+ const model = opts.model || defaults.model;
990
+ logger.info(`Suggesting baseline config via ${provider}/${model}`);
991
+ // Summarise the current field keys so the LLM knows what already exists
992
+ const existingKeys = currentFields.map((f) => f.key).join(", ");
993
+ const systemPrompt = `You are a document intelligence expert helping configure a baseline extraction schema for Folio, a local document automation tool.
994
+
995
+ Given a description of the user's workflow, return ONLY a valid JSON object with this exact shape (no markdown, no backticks, no explanation):
996
+ {
997
+ "context": "one or two sentences injected into the LLM extraction prompt — describe document types, languages, vendors, or any domain detail that helps the model",
998
+ "fields": [
999
+ { "key": "snake_case_key", "type": "string|number|date|currency|string[]", "description": "what to extract and why", "enabled": true, "is_default": false }
1000
+ ]
1001
+ }
1002
+
1003
+ Rules:
1004
+ - "context" must be a single concise string (≤ 3 sentences). Focus on what makes these documents distinctive.
1005
+ - "fields" must only contain CUSTOM fields the user should ADD — do not repeat any of the existing field keys: ${existingKeys}
1006
+ - Each custom field needs a clear key (snake_case, no spaces), the most precise type, and a description that doubles as a hint to the extraction model.
1007
+ - Suggest between 2 and 6 custom fields — quality over quantity.
1008
+ - Return an empty fields array if no meaningful custom fields apply.`;
1009
+ try {
1010
+ if (opts.userId) {
1011
+ Actuator.logEvent(null, opts.userId, "analysis", "Configuration", {
1012
+ action: "LLM request (baseline suggestion)",
1013
+ provider,
1014
+ model,
1015
+ description_preview: description.slice(0, 180),
1016
+ current_fields_count: currentFields.length,
1017
+ }, opts.supabase);
1018
+ }
1019
+ const result = await sdk.llm.chat([
1020
+ { role: "system", content: systemPrompt },
1021
+ { role: "user", content: `My workflow: ${description}` }
1022
+ ], { provider, model });
1023
+ const raw = extractLlmResponse(result);
1024
+ if (opts.userId) {
1025
+ Actuator.logEvent(null, opts.userId, "analysis", "Configuration", {
1026
+ action: "LLM response (baseline suggestion)",
1027
+ provider,
1028
+ model,
1029
+ raw_length: raw.length,
1030
+ raw_preview: previewLlmText(raw),
1031
+ }, opts.supabase);
1032
+ }
1033
+ if (!raw)
1034
+ return { suggestion: null, error: "LLM returned empty response" };
1035
+ const parsed = parseLlmJson(raw);
1036
+ if (!parsed) {
1037
+ logger.error("JSON parse failed for baseline suggestion", { raw: raw.slice(0, 300) });
1038
+ return { suggestion: null, error: "LLM response was not valid JSON" };
1039
+ }
1040
+ if (typeof parsed.context !== "string" || !Array.isArray(parsed.fields)) {
1041
+ return { suggestion: null, error: "LLM response did not match expected shape" };
1042
+ }
1043
+ // Ensure all suggested fields are marked as custom
1044
+ parsed.fields = parsed.fields.map((f) => ({ ...f, is_default: false, enabled: true }));
1045
+ logger.info(`Baseline suggestion: context length=${parsed.context.length}, custom fields=${parsed.fields.length}`);
1046
+ return { suggestion: parsed };
1047
+ }
1048
+ catch (err) {
1049
+ const msg = err instanceof Error ? err.message : String(err);
1050
+ logger.error("Baseline suggestion failed", { err });
1051
+ if (opts.userId) {
1052
+ Actuator.logEvent(null, opts.userId, "error", "Configuration", {
1053
+ action: "LLM baseline suggestion failed",
1054
+ provider,
1055
+ model,
1056
+ error: msg,
1057
+ }, opts.supabase);
1058
+ }
1059
+ return { suggestion: null, error: msg };
1060
+ }
1061
+ }
1062
+ /**
1063
+ * Suggest a refinement draft for an existing policy based on a specific
1064
+ * ingestion that the user expected to match.
1065
+ */
1066
+ static async suggestPolicyRefinement(currentPolicy, docContext, opts = {}) {
1067
+ const sdk = SDKService.getSDK();
1068
+ if (!sdk) {
1069
+ const msg = "SDK not available for policy refinement";
1070
+ logger.warn(msg);
1071
+ return { policy: null, rationale: [], error: msg };
1072
+ }
1073
+ const defaults = await SDKService.getDefaultChatProvider();
1074
+ const provider = opts.provider || defaults.provider;
1075
+ const model = opts.model || defaults.model;
1076
+ const compactTrace = (docContext.trace ?? [])
1077
+ .slice(-12)
1078
+ .map((entry) => ({
1079
+ step: entry.step,
1080
+ details: entry.details ?? null,
1081
+ }));
1082
+ const evidence = {
1083
+ filename: docContext.filename,
1084
+ mime_type: docContext.mimeType ?? null,
1085
+ status: docContext.status ?? null,
1086
+ summary: docContext.summary ?? null,
1087
+ tags: docContext.tags ?? [],
1088
+ extracted: docContext.extracted ?? {},
1089
+ recent_trace: compactTrace,
1090
+ };
1091
+ const systemPrompt = `You are a Folio Policy Engine expert. Refine an existing policy so it better matches a target document while minimizing regressions.
1092
+
1093
+ Return ONLY valid JSON with this exact shape:
1094
+ {
1095
+ "policy": <full FolioPolicy object>,
1096
+ "rationale": ["short reason 1", "short reason 2"]
1097
+ }
1098
+
1099
+ Rules:
1100
+ - Keep metadata.id EXACTLY unchanged.
1101
+ - Preserve existing action targets unless the evidence clearly requires a correction.
1102
+ - Prefer tightening/adding deterministic match conditions (keyword/file_type) over broadening.
1103
+ - Keep extraction fields useful; do not remove important existing fields without a reason.
1104
+ - Ensure output remains a valid Folio policy schema.`;
1105
+ const userPrompt = `Current policy:\n${JSON.stringify(currentPolicy, null, 2)}\n\n` +
1106
+ `Target document evidence:\n${JSON.stringify(evidence, null, 2)}\n\n` +
1107
+ `Produce a refined policy draft and rationale.`;
1108
+ try {
1109
+ if (opts.userId) {
1110
+ Actuator.logEvent(docContext.ingestionId, opts.userId, "analysis", "Policy Synthesis", {
1111
+ action: "LLM request (policy refinement)",
1112
+ provider,
1113
+ model,
1114
+ policy_id: currentPolicy.metadata.id,
1115
+ }, opts.supabase);
1116
+ }
1117
+ const result = await sdk.llm.chat([
1118
+ { role: "system", content: systemPrompt },
1119
+ { role: "user", content: userPrompt },
1120
+ ], { provider, model });
1121
+ const raw = extractLlmResponse(result);
1122
+ if (opts.userId) {
1123
+ Actuator.logEvent(docContext.ingestionId, opts.userId, "analysis", "Policy Synthesis", {
1124
+ action: "LLM response (policy refinement)",
1125
+ provider,
1126
+ model,
1127
+ raw_length: raw.length,
1128
+ raw_preview: previewLlmText(raw),
1129
+ }, opts.supabase);
1130
+ }
1131
+ if (!raw.trim()) {
1132
+ return { policy: null, rationale: [], error: "LLM returned empty refinement response" };
1133
+ }
1134
+ const parsed = parseLlmJson(raw);
1135
+ if (!parsed) {
1136
+ return { policy: null, rationale: [], error: "LLM refinement response was not valid JSON" };
1137
+ }
1138
+ const parsedPolicy = parsed.policy ?? parsed;
1139
+ if (!parsedPolicy || typeof parsedPolicy !== "object") {
1140
+ return { policy: null, rationale: [], error: "Refinement response did not include a policy draft" };
1141
+ }
1142
+ const repairedPolicy = {
1143
+ ...currentPolicy,
1144
+ ...parsedPolicy,
1145
+ apiVersion: "folio/v1",
1146
+ kind: "Policy",
1147
+ metadata: {
1148
+ ...currentPolicy.metadata,
1149
+ ...(parsedPolicy.metadata ?? {}),
1150
+ id: currentPolicy.metadata.id,
1151
+ version: parsedPolicy.metadata?.version ?? currentPolicy.metadata.version ?? "1.0.0",
1152
+ enabled: parsedPolicy.metadata?.enabled ?? currentPolicy.metadata.enabled ?? true,
1153
+ priority: parsedPolicy.metadata?.priority ?? currentPolicy.metadata.priority,
1154
+ name: parsedPolicy.metadata?.name ?? currentPolicy.metadata.name,
1155
+ description: parsedPolicy.metadata?.description ?? currentPolicy.metadata.description,
1156
+ },
1157
+ spec: {
1158
+ ...currentPolicy.spec,
1159
+ ...(parsedPolicy.spec ?? {}),
1160
+ match: parsedPolicy.spec?.match ?? currentPolicy.spec.match,
1161
+ extract: Array.isArray(parsedPolicy.spec?.extract)
1162
+ ? parsedPolicy.spec.extract
1163
+ : currentPolicy.spec.extract,
1164
+ actions: Array.isArray(parsedPolicy.spec?.actions) && parsedPolicy.spec.actions.length > 0
1165
+ ? parsedPolicy.spec.actions
1166
+ : currentPolicy.spec.actions,
1167
+ },
1168
+ };
1169
+ if (!PolicyLoader.validate(repairedPolicy)) {
1170
+ return { policy: null, rationale: [], error: "Refined policy draft did not pass schema validation" };
1171
+ }
1172
+ const rationale = Array.isArray(parsed.rationale)
1173
+ ? parsed.rationale.map((item) => String(item).trim()).filter(Boolean).slice(0, 6)
1174
+ : [];
1175
+ return {
1176
+ policy: repairedPolicy,
1177
+ rationale,
1178
+ };
1179
+ }
1180
+ catch (err) {
1181
+ const msg = err instanceof Error ? err.message : String(err);
1182
+ logger.error("Policy refinement suggestion failed", { err });
1183
+ if (opts.userId) {
1184
+ Actuator.logEvent(docContext.ingestionId, opts.userId, "error", "Policy Synthesis", {
1185
+ action: "LLM policy refinement failed",
1186
+ provider,
1187
+ model,
1188
+ error: msg,
1189
+ policy_id: currentPolicy.metadata.id,
1190
+ }, opts.supabase);
1191
+ }
1192
+ return { policy: null, rationale: [], error: msg };
1193
+ }
1194
+ }
1195
+ /**
1196
+ * Synthesize a FolioPolicy from a natural language description using the LLM.
1197
+ */
1198
+ static async synthesizeFromNL(description, opts = {}) {
1199
+ const sdk = SDKService.getSDK();
1200
+ if (!sdk) {
1201
+ const msg = "SDK not available for policy synthesis";
1202
+ logger.warn(msg);
1203
+ return { policy: null, error: msg };
1204
+ }
1205
+ // Use explicitly provided provider/model, else fall back to SDK defaults
1206
+ const defaults = await SDKService.getDefaultChatProvider();
1207
+ const provider = opts.provider || defaults.provider;
1208
+ const model = opts.model || defaults.model;
1209
+ logger.info(`Synthesizing policy via ${provider}/${model}`);
1210
+ const targetHints = extractSynthesisTargetHints(description);
1211
+ const synthesisWarnings = [];
1212
+ let sheetTemplateContext;
1213
+ if (hasText(targetHints.sheetReference) && hasText(opts.userId)) {
1214
+ try {
1215
+ const templateResolution = await resolveSheetTemplateContext(targetHints, {
1216
+ userId: opts.userId,
1217
+ supabase: opts.supabase,
1218
+ });
1219
+ sheetTemplateContext = templateResolution.context;
1220
+ if (templateResolution.warning) {
1221
+ synthesisWarnings.push(`Google Sheet template note: ${templateResolution.warning}`);
1222
+ }
1223
+ if (sheetTemplateContext && opts.userId) {
1224
+ Actuator.logEvent(null, opts.userId, "analysis", "Policy Synthesis", {
1225
+ action: "Resolved Google Sheet template for synthesis",
1226
+ spreadsheet_id: sheetTemplateContext.spreadsheetId,
1227
+ range: sheetTemplateContext.range,
1228
+ headers_count: sheetTemplateContext.headers.length,
1229
+ headers: sheetTemplateContext.headers,
1230
+ }, opts.supabase);
1231
+ }
1232
+ }
1233
+ catch (templateErr) {
1234
+ const templateMsg = templateErr instanceof Error ? templateErr.message : String(templateErr);
1235
+ synthesisWarnings.push(`Google Sheet template note: ${templateMsg}`);
1236
+ logger.warn("Failed to resolve Google Sheet template for synthesis", { error: templateErr });
1237
+ }
1238
+ }
1239
+ const systemPrompt = `You are a Folio Policy Engine expert. Convert natural language descriptions into a valid FolioPolicy JSON object.
1240
+
1241
+ Return ONLY a valid JSON object with this exact shape (no markdown, no backticks):
1242
+ {
1243
+ "apiVersion": "folio/v1",
1244
+ "kind": "Policy",
1245
+ "metadata": { "id": "kebab-case-id", "name": "Human Name", "version": "1.0.0", "description": "Brief description", "priority": 100, "tags": ["tag1"], "enabled": true },
1246
+ "spec": {
1247
+ "match": { "strategy": "ALL", "conditions": [{ "type": "keyword", "value": ["keyword1", "keyword2"], "case_sensitive": false }] },
1248
+ "extract": [{ "key": "field_name", "type": "string", "description": "what to extract", "required": true }],
1249
+ "actions": [{ "type": "copy", "destination": "/path/to/folder" }]
1250
+ }
1251
+ }
1252
+
1253
+ Supported action types include:
1254
+ - copy, rename, auto_rename, copy_to_gdrive, append_to_google_sheet, log_csv, notify, webhook
1255
+ - For append_to_google_sheet use:
1256
+ { "type": "append_to_google_sheet", "spreadsheet_id": "<sheet-id-or-url>", "columns": ["{date}","{issuer}"] }
1257
+ - columns is optional; if omitted, runtime auto-maps extracted fields to sheet headers dynamically.
1258
+ - range is optional; only include it when user explicitly requires a specific tab/range.
1259
+ - If spreadsheet_id is a Google Sheets URL with gid, omit range unless user explicitly provided one.
1260
+ - Never omit user-provided external targets (folder IDs, spreadsheet URLs/IDs); preserve explicit ranges unless the provided range is only a generic Sheet1 fallback alongside a gid URL.`;
1261
+ const preserveRangeHint = shouldPreserveRangeHint(targetHints);
1262
+ const sheetTemplateFieldsPreview = sheetTemplateContext
1263
+ ? sheetTemplateContext.fields
1264
+ .slice(0, 40)
1265
+ .map((field) => ` - "${field.header}" -> key "${field.key}" (${field.type})`)
1266
+ .join("\n")
1267
+ : "";
1268
+ const hasMoreTemplateFields = !!sheetTemplateContext && sheetTemplateContext.fields.length > 40;
1269
+ const sheetTemplateGuidanceBlock = sheetTemplateContext
1270
+ ? `\n\nGoogle Sheet template context (authoritative for this policy):
1271
+ - spreadsheet_id: ${sheetTemplateContext.spreadsheetReference}
1272
+ - resolved range: ${sheetTemplateContext.range}
1273
+ - headers (${sheetTemplateContext.fields.length}):
1274
+ ${sheetTemplateFieldsPreview}${hasMoreTemplateFields ? "\n - ... (truncated)" : ""}
1275
+
1276
+ When template context is present:
1277
+ - Ensure spec.extract includes keys for these headers (same key names as listed).
1278
+ - Keep append_to_google_sheet action targeting this spreadsheet_id.
1279
+ - Use range "${sheetTemplateContext.range}" unless the user explicitly asks for a different valid range.`
1280
+ : "";
1281
+ const targetHintsBlock = [
1282
+ hasText(targetHints.driveFolderId) ? `- Google Drive Folder ID: ${targetHints.driveFolderId}` : null,
1283
+ hasText(targetHints.sheetReference) ? `- Google Sheet: ${targetHints.sheetReference}` : null,
1284
+ preserveRangeHint ? `- Preferred range: ${targetHints.range}` : null,
1285
+ ].filter(Boolean).join("\n");
1286
+ const synthesisRequest = targetHintsBlock
1287
+ ? `Create a policy for: ${description}\n\nRequired target values to preserve exactly:\n${targetHintsBlock}${sheetTemplateGuidanceBlock}`
1288
+ : `Create a policy for: ${description}`;
1289
+ try {
1290
+ if (opts.userId) {
1291
+ Actuator.logEvent(null, opts.userId, "analysis", "Policy Synthesis", {
1292
+ action: "LLM request (policy synthesis)",
1293
+ provider,
1294
+ model,
1295
+ description_preview: description.slice(0, 180),
1296
+ }, opts.supabase);
1297
+ }
1298
+ const result = await sdk.llm.chat([
1299
+ { role: "system", content: systemPrompt },
1300
+ { role: "user", content: synthesisRequest }
1301
+ ], { provider, model });
1302
+ // Log the entire result to discover the SDK response schema
1303
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1304
+ logger.info(`Full SDK result keys: ${Object.keys(result).join(", ")}`);
1305
+ logger.info(`Full SDK result: ${JSON.stringify(result).slice(0, 1000)}`);
1306
+ // SDK response shape: { success: true, response: { content: "..." } }
1307
+ const raw = extractLlmResponse(result);
1308
+ if (opts.userId) {
1309
+ Actuator.logEvent(null, opts.userId, "analysis", "Policy Synthesis", {
1310
+ action: "LLM response (policy synthesis)",
1311
+ provider,
1312
+ model,
1313
+ raw_length: raw.length,
1314
+ raw_preview: previewLlmText(raw),
1315
+ }, opts.supabase);
1316
+ }
1317
+ logger.info(`Synthesis raw response (first 500 chars): ${raw.slice(0, 500)}`);
1318
+ if (!raw) {
1319
+ return { policy: null, error: "LLM returned empty response", raw };
1320
+ }
1321
+ const parsed = parseLlmJson(raw);
1322
+ if (!parsed) {
1323
+ logger.error("JSON parse failed", { raw: raw.slice(0, 300) });
1324
+ return { policy: null, error: "LLM response was not valid JSON", raw };
1325
+ }
1326
+ let repaired = applySynthesisTargetHints(parsed, targetHints);
1327
+ repaired = applySheetTemplateContext(repaired, sheetTemplateContext);
1328
+ if (PolicyLoader.validate(repaired)) {
1329
+ return {
1330
+ policy: repaired,
1331
+ ...(synthesisWarnings.length > 0 ? { error: synthesisWarnings.join(" ") } : {}),
1332
+ };
1333
+ }
1334
+ // Return as draft even if validation fails — let the UI show a preview
1335
+ logger.warn("Synthesized policy failed strict validation, returning as draft");
1336
+ synthesisWarnings.push("Policy schema may be incomplete — please review before saving");
1337
+ return { policy: repaired, error: synthesisWarnings.join(" ") };
1338
+ }
1339
+ catch (err) {
1340
+ const msg = err instanceof Error ? err.message : String(err);
1341
+ logger.error("Policy synthesis failed", { err });
1342
+ if (opts.userId) {
1343
+ Actuator.logEvent(null, opts.userId, "error", "Policy Synthesis", {
1344
+ action: "LLM policy synthesis failed",
1345
+ provider,
1346
+ model,
1347
+ error: msg,
1348
+ }, opts.supabase);
1349
+ }
1350
+ return { policy: null, error: msg };
1351
+ }
1352
+ }
1353
+ }