plasalid 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/README.md +3 -4
  2. package/dist/ai/agent.d.ts +6 -7
  3. package/dist/ai/agent.js +27 -11
  4. package/dist/ai/personas.js +48 -46
  5. package/dist/ai/system-prompt.js +1 -1
  6. package/dist/ai/tools/account-mutex.d.ts +1 -0
  7. package/dist/ai/tools/account-mutex.js +16 -0
  8. package/dist/ai/tools/index.js +4 -12
  9. package/dist/ai/tools/ingest.d.ts +1 -1
  10. package/dist/ai/tools/ingest.js +282 -242
  11. package/dist/ai/tools/merchants.js +1 -28
  12. package/dist/ai/tools/read.js +8 -8
  13. package/dist/ai/tools/record.js +3 -36
  14. package/dist/ai/tools/resolve.js +25 -22
  15. package/dist/ai/tools/scan.js +0 -1
  16. package/dist/ai/tools/types.d.ts +14 -21
  17. package/dist/cli/commands/record.js +1 -82
  18. package/dist/cli/commands/resolve.d.ts +5 -2
  19. package/dist/cli/commands/resolve.js +36 -5
  20. package/dist/cli/commands/revert.js +4 -2
  21. package/dist/cli/commands/rules.js +2 -2
  22. package/dist/cli/commands/scan.js +199 -128
  23. package/dist/cli/commands/status.js +5 -5
  24. package/dist/cli/index.js +8 -29
  25. package/dist/cli/ink/ScanDashboard.d.ts +49 -0
  26. package/dist/cli/ink/ScanDashboard.js +214 -0
  27. package/dist/cli/ink/scan_dashboard.d.ts +40 -25
  28. package/dist/cli/ink/scan_dashboard.js +139 -44
  29. package/dist/db/queries/account-balance.d.ts +1 -1
  30. package/dist/db/queries/questions.d.ts +62 -0
  31. package/dist/db/queries/questions.js +110 -0
  32. package/dist/db/queries/transactions.d.ts +1 -1
  33. package/dist/db/queries/unknowns.d.ts +17 -15
  34. package/dist/db/queries/unknowns.js +35 -39
  35. package/dist/db/schema.js +6 -28
  36. package/dist/scanner/audit/auditor.d.ts +31 -0
  37. package/dist/scanner/audit/auditor.js +72 -0
  38. package/dist/scanner/audit/engine.d.ts +10 -0
  39. package/dist/scanner/audit/engine.js +98 -0
  40. package/dist/scanner/audit/eventBus.d.ts +60 -0
  41. package/dist/scanner/audit/eventBus.js +35 -0
  42. package/dist/scanner/audit/passes/index.d.ts +11 -0
  43. package/dist/scanner/audit/passes/index.js +9 -0
  44. package/dist/scanner/audit/passes/types.d.ts +23 -0
  45. package/dist/scanner/audit/passes/types.js +1 -0
  46. package/dist/scanner/audit/types.d.ts +27 -0
  47. package/dist/scanner/audit/types.js +1 -0
  48. package/dist/scanner/auditor.d.ts +51 -0
  49. package/dist/scanner/auditor.js +80 -0
  50. package/dist/scanner/buffer/engine.d.ts +9 -0
  51. package/dist/scanner/buffer/engine.js +110 -0
  52. package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
  53. package/dist/scanner/buffer/sharedBuffer.js +130 -0
  54. package/dist/scanner/buffer/types.d.ts +67 -0
  55. package/dist/scanner/buffer/types.js +1 -0
  56. package/dist/scanner/buffer.d.ts +45 -38
  57. package/dist/scanner/buffer.js +93 -61
  58. package/dist/scanner/bus/engine.d.ts +11 -0
  59. package/dist/scanner/bus/engine.js +42 -0
  60. package/dist/scanner/bus/types.d.ts +53 -0
  61. package/dist/scanner/bus/types.js +1 -0
  62. package/dist/scanner/bus.d.ts +38 -0
  63. package/dist/scanner/bus.js +37 -0
  64. package/dist/scanner/chunk-worker.d.ts +19 -0
  65. package/dist/scanner/chunk-worker.js +67 -0
  66. package/dist/scanner/chunkWorker.d.ts +20 -0
  67. package/dist/scanner/chunkWorker.js +59 -0
  68. package/dist/scanner/chunker/chunker.d.ts +7 -0
  69. package/dist/scanner/chunker/chunker.js +60 -0
  70. package/dist/scanner/chunker.d.ts +7 -0
  71. package/dist/scanner/chunker.js +60 -0
  72. package/dist/scanner/converge.d.ts +29 -0
  73. package/dist/scanner/converge.js +15 -0
  74. package/dist/scanner/decrypt.d.ts +10 -0
  75. package/dist/scanner/decrypt.js +80 -0
  76. package/dist/scanner/engine/scanEngine.d.ts +24 -0
  77. package/dist/scanner/engine/scanEngine.js +87 -0
  78. package/dist/scanner/engine/types.d.ts +90 -0
  79. package/dist/scanner/engine/types.js +1 -0
  80. package/dist/scanner/engine.d.ts +90 -0
  81. package/dist/scanner/engine.js +84 -0
  82. package/dist/scanner/file-worker.d.ts +33 -0
  83. package/dist/scanner/file-worker.js +28 -0
  84. package/dist/scanner/fileWorker.d.ts +33 -0
  85. package/dist/scanner/fileWorker.js +22 -0
  86. package/dist/scanner/hooks/types.d.ts +25 -0
  87. package/dist/scanner/hooks/types.js +1 -0
  88. package/dist/scanner/hooks.d.ts +23 -0
  89. package/dist/scanner/hooks.js +1 -0
  90. package/dist/scanner/parse.d.ts +10 -0
  91. package/dist/scanner/parse.js +47 -0
  92. package/dist/scanner/passes/index.d.ts +8 -0
  93. package/dist/scanner/passes/index.js +6 -0
  94. package/dist/scanner/passes/types.d.ts +22 -0
  95. package/dist/scanner/passes/types.js +1 -0
  96. package/dist/scanner/pdf/chunker.d.ts +7 -0
  97. package/dist/scanner/pdf/chunker.js +60 -0
  98. package/dist/scanner/pdf/password-store.d.ts +34 -0
  99. package/dist/scanner/pdf/password-store.js +83 -0
  100. package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
  101. package/dist/scanner/pdf/pdf-unlock.js +50 -0
  102. package/dist/scanner/pdf/pdf.d.ts +17 -0
  103. package/dist/scanner/pdf/pdf.js +36 -0
  104. package/dist/scanner/pdf/state-machine.d.ts +60 -0
  105. package/dist/scanner/pdf/state-machine.js +64 -0
  106. package/dist/scanner/pdf/unlock.d.ts +22 -0
  107. package/dist/scanner/pdf/unlock.js +121 -0
  108. package/dist/scanner/phase-decrypt.d.ts +10 -0
  109. package/dist/scanner/phase-decrypt.js +80 -0
  110. package/dist/scanner/phase-parse.d.ts +10 -0
  111. package/dist/scanner/phase-parse.js +46 -0
  112. package/dist/scanner/phases/chunk.d.ts +8 -0
  113. package/dist/scanner/phases/chunk.js +13 -0
  114. package/dist/scanner/phases/commit.d.ts +12 -0
  115. package/dist/scanner/phases/commit.js +140 -0
  116. package/dist/scanner/phases/decrypt.d.ts +10 -0
  117. package/dist/scanner/phases/decrypt.js +80 -0
  118. package/dist/scanner/phases/parse.d.ts +10 -0
  119. package/dist/scanner/phases/parse.js +46 -0
  120. package/dist/scanner/phases/resolve.d.ts +10 -0
  121. package/dist/scanner/phases/resolve.js +17 -0
  122. package/dist/scanner/phases/review.d.ts +10 -0
  123. package/dist/scanner/phases/review.js +12 -0
  124. package/dist/scanner/progress.d.ts +14 -0
  125. package/dist/scanner/progress.js +21 -0
  126. package/dist/scanner/resolver-memory.d.ts +8 -0
  127. package/dist/scanner/resolver-memory.js +24 -0
  128. package/dist/scanner/resolver.d.ts +39 -0
  129. package/dist/scanner/resolver.js +196 -0
  130. package/dist/scanner/result.d.ts +17 -0
  131. package/dist/scanner/result.js +19 -0
  132. package/dist/scanner/run-passes.d.ts +30 -0
  133. package/dist/scanner/run-passes.js +15 -0
  134. package/dist/scanner/unlock.js +1 -1
  135. package/dist/scanner/worker.d.ts +19 -0
  136. package/dist/scanner/worker.js +67 -0
  137. package/dist/scanner/workers/chunkWorker.d.ts +20 -0
  138. package/dist/scanner/workers/chunkWorker.js +65 -0
  139. package/dist/scanner/workers/fileWorker.d.ts +32 -0
  140. package/dist/scanner/workers/fileWorker.js +22 -0
  141. package/package.json +1 -1
@@ -1,20 +1,70 @@
1
- import { createAccount, updateAccountMetadata, findAccountById, } from "../../db/queries/account-balance.js";
1
+ import { createAccount, updateAccountMetadata, } from "../../db/queries/account-balance.js";
2
2
  import { validateTransaction, insertTransactionRows, recordTransaction, } from "../../db/queries/transactions.js";
3
- import { appendAction } from "../../db/queries/action-log.js";
4
- import { getUnknownTarget, recordUnknown, resolveUnknown, } from "../../db/queries/unknowns.js";
5
- import { runExclusive as runAccountExclusive } from "../../scanner/account-mutex.js";
6
- import { sanitizeForPrompt } from "../sanitize.js";
3
+ import { recordQuestion } from "../../db/queries/questions.js";
4
+ import { runExclusive as runAccountExclusive } from "./account-mutex.js";
7
5
  import { ACCOUNT_TYPE_DESCRIPTIONS } from "../../accounts/taxonomy.js";
8
6
  const ACCOUNT_TYPES = Object.keys(ACCOUNT_TYPE_DESCRIPTIONS);
9
- /**
10
- * Account + transaction write primitives
11
- *
12
- * Shared by scan, resolve, and record. Each tool branches once on
13
- * `ctx.correlationId`: when set (record path), the data write and the
14
- * action_log insert run inside a single transaction so the audit row is
15
- * atomic with the change. Without it (scan / resolve), the write goes through
16
- * the existing path unchanged.
17
- */
7
+ const BATCH_MAX = 50;
8
+ const TRANSACTION_ITEM_SCHEMA = {
9
+ type: "object",
10
+ properties: {
11
+ date: {
12
+ type: "string",
13
+ description: "ISO Gregorian date (YYYY-MM-DD).",
14
+ },
15
+ description: {
16
+ type: "string",
17
+ description: "Short human-readable description.",
18
+ },
19
+ source_page: {
20
+ type: "number",
21
+ description: "Page number in the source PDF, if known.",
22
+ },
23
+ raw_descriptor: {
24
+ type: "string",
25
+ description: "The exact statement line (the raw merchant descriptor) when posting from a PDF — preserved for alias matching and later review.",
26
+ },
27
+ merchant: {
28
+ type: "object",
29
+ description: "Counter-party block. Omit for transfers between own accounts and pure metadata movements.",
30
+ properties: {
31
+ canonical_name: {
32
+ type: "string",
33
+ description: "Normalized merchant name, Title Case.",
34
+ },
35
+ alias: {
36
+ type: "string",
37
+ description: "The raw descriptor exactly as it appears on the statement.",
38
+ },
39
+ default_account_id: {
40
+ type: "string",
41
+ description: "Optional learned cache; do not set on first sight.",
42
+ },
43
+ },
44
+ required: ["canonical_name"],
45
+ },
46
+ merchant_id: {
47
+ type: "string",
48
+ description: "Pre-resolved merchant id (from the scanner's alias pre-pass).",
49
+ },
50
+ postings: {
51
+ type: "array",
52
+ description: "Two or more postings that balance.",
53
+ items: {
54
+ type: "object",
55
+ properties: {
56
+ account_id: { type: "string" },
57
+ debit: { type: "number" },
58
+ credit: { type: "number" },
59
+ currency: { type: "string", default: "THB" },
60
+ memo: { type: "string" },
61
+ },
62
+ required: ["account_id"],
63
+ },
64
+ },
65
+ },
66
+ required: ["date", "description", "postings"],
67
+ };
18
68
  const ACCOUNT_DEFS = [
19
69
  {
20
70
  name: "create_account",
@@ -92,9 +142,26 @@ const ACCOUNT_DEFS = [
92
142
  required: ["account_id"],
93
143
  },
94
144
  },
145
+ {
146
+ name: "record_transactions",
147
+ description: `Post many balanced double-entry transactions in a single tool call. **Strongly preferred over record_transaction whenever you have more than one row to post** — the scan tool-step budget is finite (100 per file) and the singular form burns one step per row. Each item has the same shape as record_transaction. Validation runs per item: valid items are written directly to the DB and their ids returned; invalid items are reported back so you can fix and retry just those indices. Limit each call to ≤${BATCH_MAX} transactions; chunk larger statements across multiple calls.`,
148
+ input_schema: {
149
+ type: "object",
150
+ properties: {
151
+ transactions: {
152
+ type: "array",
153
+ description: `Up to ${BATCH_MAX} transactions; each has the same shape as record_transaction.`,
154
+ items: TRANSACTION_ITEM_SCHEMA,
155
+ minItems: 1,
156
+ maxItems: BATCH_MAX,
157
+ },
158
+ },
159
+ required: ["transactions"],
160
+ },
161
+ },
95
162
  {
96
163
  name: "record_transaction",
97
- description: "Post one balanced double-entry transaction the right tool for any real-world event (purchase, payment, transfer, refund, salary, withdrawal). Use adjust_account_balance instead when the user is stating a current balance rather than describing a transaction. The sum of debits MUST equal the sum of credits (within one currency). Convert Buddhist-Era dates by subtracting 543. Each posting carries an ISO 4217 currency code (THB, USD, EUR, …); default to THB. Use the account's currency where set; only deviate when the source row is explicitly in another currency. When the transaction has an external counter-party, attach a `merchant` block — Plasalid dedups merchants and learns a default expense account per merchant so future statements skip re-categorization.",
164
+ description: "Post ONE balanced double-entry transaction. Prefer record_transactions (plural) when posting more than one row at a time it burns one tool step instead of N. Use this singular form for one-off corrections (e.g. retrying a single failed item from a batch). The sum of debits MUST equal the sum of credits (within one currency). Convert Buddhist-Era dates by subtracting 543. Each posting carries an ISO 4217 currency code (THB, USD, EUR, …); default to THB. Use the account's currency where set; only deviate when the source row is explicitly in another currency. When the transaction has an external counter-party, attach a `merchant` block — Plasalid dedups merchants and learns a default expense account per merchant so future statements skip re-categorization.",
98
165
  input_schema: {
99
166
  type: "object",
100
167
  properties: {
@@ -177,32 +244,60 @@ const ACCOUNT_LABELS = {
177
244
  create_account: "Creating account",
178
245
  update_account_metadata: "Updating account metadata",
179
246
  record_transaction: "Posting transaction",
247
+ record_transactions: "Posting transactions",
180
248
  };
181
- /**
182
- * Run a write inside an audit-wrapping transaction. When the caller has a
183
- * correlation id, the write + action_log insert land atomically; otherwise
184
- * it's just the write. The write closure can return an AuditRecord (logged)
185
- * or null (no audit row this call — used when an update was a no-op).
186
- */
187
- function writeWithAudit(db, ctx, write) {
188
- if (!ctx?.correlationId) {
189
- write();
190
- return;
191
- }
192
- const op = db.transaction(() => {
193
- const audit = write();
194
- if (!audit)
195
- return;
196
- appendAction(db, {
197
- correlation_id: ctx.correlationId,
198
- command: ctx.command ?? "record",
199
- user_input: ctx.userInput ?? null,
200
- action_type: audit.actionType,
201
- target_id: audit.targetId,
202
- payload: audit.payload,
249
+ function buildTransactionInput(input, ctx) {
250
+ return {
251
+ date: input.date,
252
+ description: input.description,
253
+ source_file_id: ctx.fileId,
254
+ source_page: input.source_page ?? null,
255
+ raw_descriptor: input.raw_descriptor ?? null,
256
+ merchant: input.merchant ?? null,
257
+ merchant_id: input.merchant_id ?? null,
258
+ postings: (input.postings || []).map((p) => ({
259
+ account_id: p.account_id,
260
+ debit: p.debit ?? 0,
261
+ credit: p.credit ?? 0,
262
+ currency: p.currency || "THB",
263
+ memo: p.memo ?? null,
264
+ })),
265
+ };
266
+ }
267
+ async function persistOneTransaction(db, ctx, txInput) {
268
+ try {
269
+ const validated = validateTransaction(txInput);
270
+ const tx = db.transaction(() => {
271
+ insertTransactionRows(db, validated);
203
272
  });
204
- });
205
- op();
273
+ tx();
274
+ if (ctx.progress && ctx.chunkId) {
275
+ ctx.progress.emit({ chunkId: ctx.chunkId, kind: "tx" });
276
+ }
277
+ return { ok: true, id: validated.id };
278
+ }
279
+ catch (err) {
280
+ const message = err?.message ?? String(err);
281
+ if (ctx.scanId) {
282
+ try {
283
+ recordQuestion(db, {
284
+ file_id: ctx.fileId ?? null,
285
+ scan_id: ctx.scanId,
286
+ transaction_id: null,
287
+ account_id: null,
288
+ kind: "scan_commit_failure",
289
+ prompt: `Could not record "${txInput.description}" on ${txInput.date}: ${message}. Review the source statement and re-enter via the record flow.`,
290
+ });
291
+ if (ctx.progress && ctx.chunkId) {
292
+ ctx.progress.emit({ chunkId: ctx.chunkId, kind: "question" });
293
+ }
294
+ }
295
+ catch {
296
+ // failure to record a failure shouldn't crash the scan
297
+ }
298
+ }
299
+ return { ok: false, error: message };
300
+ }
206
301
  }
207
302
  async function accountExecute(db, name, input, ctx) {
208
303
  switch (name) {
@@ -212,25 +307,18 @@ async function accountExecute(db, name, input, ctx) {
212
307
  }
213
308
  return await runAccountExclusive(() => {
214
309
  try {
215
- writeWithAudit(db, ctx, () => {
216
- createAccount(db, {
217
- id: input.id,
218
- name: input.name,
219
- type: input.type,
220
- parent_id: input.parent_id ?? null,
221
- subtype: input.subtype ?? null,
222
- bank_name: input.bank_name ?? null,
223
- account_number_masked: input.account_number_masked ?? null,
224
- currency: input.currency,
225
- due_day: input.due_day ?? null,
226
- statement_day: input.statement_day ?? null,
227
- metadata: input.metadata ?? null,
228
- });
229
- return {
230
- actionType: "create_account",
231
- targetId: input.id,
232
- payload: { row: findAccountById(db, input.id) },
233
- };
310
+ createAccount(db, {
311
+ id: input.id,
312
+ name: input.name,
313
+ type: input.type,
314
+ parent_id: input.parent_id ?? null,
315
+ subtype: input.subtype ?? null,
316
+ bank_name: input.bank_name ?? null,
317
+ account_number_masked: input.account_number_masked ?? null,
318
+ currency: input.currency,
319
+ due_day: input.due_day ?? null,
320
+ statement_day: input.statement_day ?? null,
321
+ metadata: input.metadata ?? null,
234
322
  });
235
323
  return `Account created: ${input.id} (${input.name}, ${input.type}).`;
236
324
  }
@@ -245,26 +333,15 @@ async function accountExecute(db, name, input, ctx) {
245
333
  case "update_account_metadata": {
246
334
  return await runAccountExclusive(() => {
247
335
  try {
248
- let changed = false;
249
- writeWithAudit(db, ctx, () => {
250
- const result = updateAccountMetadata(db, input.account_id, {
251
- due_day: input.due_day,
252
- statement_day: input.statement_day,
253
- points_balance: input.points_balance,
254
- account_number_masked: input.account_number_masked,
255
- bank_name: input.bank_name,
256
- metadata: input.metadata,
257
- });
258
- changed = result.changed;
259
- if (!result.changed)
260
- return null;
261
- return {
262
- actionType: "update_account_metadata",
263
- targetId: input.account_id,
264
- payload: { before: result.before, after: result.after },
265
- };
336
+ const result = updateAccountMetadata(db, input.account_id, {
337
+ due_day: input.due_day,
338
+ statement_day: input.statement_day,
339
+ points_balance: input.points_balance,
340
+ account_number_masked: input.account_number_masked,
341
+ bank_name: input.bank_name,
342
+ metadata: input.metadata,
266
343
  });
267
- return changed ? `Updated ${input.account_id}.` : "Nothing to update.";
344
+ return result.changed ? `Updated ${input.account_id}.` : "Nothing to update.";
268
345
  }
269
346
  catch (err) {
270
347
  if (String(err.message).includes("not found")) {
@@ -274,55 +351,52 @@ async function accountExecute(db, name, input, ctx) {
274
351
  }
275
352
  });
276
353
  }
354
+ case "record_transactions": {
355
+ if (!ctx)
356
+ return "record_transactions is only available inside an agent session.";
357
+ const items = Array.isArray(input?.transactions) ? input.transactions : [];
358
+ if (items.length === 0)
359
+ return "record_transactions requires at least one transaction.";
360
+ if (items.length > BATCH_MAX) {
361
+ return `record_transactions accepts at most ${BATCH_MAX} transactions per call; got ${items.length}. Split into smaller batches.`;
362
+ }
363
+ const posted = [];
364
+ const failed = [];
365
+ for (let i = 0; i < items.length; i++) {
366
+ const item = items[i];
367
+ const txInput = buildTransactionInput(item, ctx);
368
+ const outcome = await persistOneTransaction(db, ctx, txInput);
369
+ if (outcome.ok) {
370
+ posted.push({ index: i, transactionId: outcome.id, date: item.date });
371
+ }
372
+ else {
373
+ failed.push({ index: i, error: outcome.error });
374
+ }
375
+ }
376
+ const lines = [`Posted ${posted.length} of ${items.length}.`];
377
+ if (posted.length > 0) {
378
+ lines.push(...posted.map(p => `- index ${p.index}: ${p.transactionId} (${p.date})`));
379
+ }
380
+ if (failed.length > 0) {
381
+ lines.push("Failed:");
382
+ lines.push(...failed.map(f => `- index ${f.index}: ${f.error}`));
383
+ lines.push("Retry the failed indices with corrections.");
384
+ }
385
+ return lines.join("\n");
386
+ }
277
387
  case "record_transaction": {
278
388
  if (!ctx)
279
389
  return "record_transaction is only available inside an agent session.";
280
- const txInput = {
281
- date: input.date,
282
- description: input.description,
283
- source_file_id: ctx.fileId,
284
- source_page: input.source_page ?? null,
285
- raw_descriptor: input.raw_descriptor ?? null,
286
- merchant: input.merchant ?? null,
287
- merchant_id: input.merchant_id ?? null,
288
- postings: (input.postings || []).map((p) => ({
289
- account_id: p.account_id,
290
- debit: p.debit ?? 0,
291
- credit: p.credit ?? 0,
292
- currency: p.currency || "THB",
293
- memo: p.memo ?? null,
294
- })),
295
- };
390
+ const txInput = buildTransactionInput(input, ctx);
391
+ if (ctx.scanId) {
392
+ const outcome = await persistOneTransaction(db, ctx, txInput);
393
+ return outcome.ok
394
+ ? `Posted transaction ${outcome.id} (${input.date}).`
395
+ : `Could not post transaction: ${outcome.error}`;
396
+ }
296
397
  try {
297
- if (ctx.buffer) {
298
- const transactionId = ctx.buffer.appendTransaction(txInput);
299
- return `Posted transaction ${transactionId} (${input.date}).`;
300
- }
301
- // No-audit path uses recordTransaction (validates + inserts in one go).
302
- // Audit path validates ahead so the validated id can be returned without
303
- // re-reading from disk after the transaction commits.
304
- if (!ctx.correlationId) {
305
- const transactionId = recordTransaction(db, txInput);
306
- return `Posted transaction ${transactionId} (${input.date}).`;
307
- }
308
- const validated = validateTransaction(txInput);
309
- writeWithAudit(db, ctx, () => {
310
- insertTransactionRows(db, validated);
311
- return {
312
- actionType: "record_transaction",
313
- targetId: validated.id,
314
- payload: {
315
- transaction: {
316
- date: validated.date,
317
- description: validated.description,
318
- source_page: validated.source_page ?? null,
319
- raw_descriptor: validated.raw_descriptor ?? null,
320
- },
321
- postings: validated.postings,
322
- },
323
- };
324
- });
325
- return `Posted transaction ${validated.id} (${input.date}).`;
398
+ const transactionId = recordTransaction(db, txInput);
399
+ return `Posted transaction ${transactionId} (${input.date}).`;
326
400
  }
327
401
  catch (err) {
328
402
  return `Could not post transaction: ${err.message}`;
@@ -337,27 +411,20 @@ export const accountIngestTools = {
337
411
  LABELS: ACCOUNT_LABELS,
338
412
  execute: accountExecute,
339
413
  };
340
- /**
341
- * Scan-only unknowns
342
- *
343
- * `note_unknown` records a clarification mid-scan without ever prompting the
344
- * user — only scan needs this. Record uses `clarify` (transient prompt, no
345
- * unknowns-table residue); resolve uses `ask_user` (prompts and resolves).
346
- */
347
- const UNKNOWN_DEFS = [
414
+ const QUESTION_DEFS = [
348
415
  {
349
- name: "note_unknown",
350
- description: "Record a clarification request without pausing the run. Use during scan when a row is ambiguous (post your best-guess transaction first, then call this with the transaction's id), when a row is unparseable (skip the transaction, call this with no transaction_id), or when you have a unknown about an account itself (pass account_id). Use kind='uncategorized_expense' when posting an expense to expense:uncategorized so resolve can group these. The resolver picks these up later with the full picture.",
416
+ name: "note_question",
417
+ description: "Record a clarification question without pausing the run. Use SPARINGLY during scan best-guess expense categorization is preferred (small misses are cheap to fix; a flood of questions is not). Call note_question only when (a) the row is unparseable (skip the row, no transaction_id), (b) you have a doubt about an account itself (pass account_id), or (c) the amount/sign/date/counter-party is genuinely unclear (post your best-guess transaction first, then call this with the transaction_id). Use kind='uncategorized_expense' only for genuinely opaque expense descriptors that landed in expense:uncategorized. The resolver picks these up later with the full picture.",
351
418
  input_schema: {
352
419
  type: "object",
353
420
  properties: {
354
421
  prompt: {
355
422
  type: "string",
356
- description: "The question or unknown in a complete sentence, with date, ฿-formatted amount, and human account names. Never reference internal ids.",
423
+ description: "The question in a complete sentence, with date, ฿-formatted amount, and human account names. Never reference internal ids.",
357
424
  },
358
425
  kind: {
359
426
  type: "string",
360
- description: "Optional category for the unknown. Use 'uncategorized_expense' when the posting landed in expense:uncategorized; the resolver batches these into one cleanup pass.",
427
+ description: "Optional category for the question. Use 'uncategorized_expense' when the posting landed in expense:uncategorized; the resolver batches these into one cleanup pass.",
361
428
  },
362
429
  options: {
363
430
  type: "array",
@@ -366,64 +433,48 @@ const UNKNOWN_DEFS = [
366
433
  },
367
434
  transaction_id: {
368
435
  type: "string",
369
- description: "Id of the transaction this unknown relates to (returned by record_transaction). Omit for file-level unknowns about an unparseable row.",
436
+ description: "Id of the transaction this question relates to (returned by record_transaction). Omit for file-level questions about an unparseable row.",
370
437
  },
371
438
  account_id: {
372
439
  type: "string",
373
- description: "Id of the account this unknown relates to. Set when the statement's bank name, currency, statement_day, due_day, or other metadata disagrees with the stored account, or when you suspect a new account you're about to create duplicates an existing one. Can be combined with transaction_id.",
440
+ description: "Id of the account this question relates to. Set when the statement's bank name, currency, statement_day, due_day, or other metadata disagrees with the stored account, or when you suspect a new account you're about to create duplicates an existing one. Can be combined with transaction_id.",
374
441
  },
375
442
  },
376
443
  required: ["prompt"],
377
444
  },
378
445
  },
379
446
  ];
380
- const UNKNOWN_LABELS = {
381
- note_unknown: "Noting unknown",
447
+ const QUESTION_LABELS = {
448
+ note_question: "Noting question",
382
449
  };
383
- async function unknownExecute(db, name, input, ctx) {
384
- if (name !== "note_unknown")
450
+ async function questionExecute(db, name, input, ctx) {
451
+ if (name !== "note_question")
385
452
  return undefined;
386
453
  if (!ctx)
387
- return "note_unknown is only available inside an agent session.";
388
- const target = {
454
+ return "note_question is only available inside an agent session.";
455
+ const id = recordQuestion(db, {
456
+ file_id: ctx.fileId ?? null,
457
+ scan_id: ctx.scanId ?? null,
389
458
  transaction_id: input.transaction_id ?? null,
390
459
  account_id: input.account_id ?? null,
391
- };
392
- if (ctx.buffer) {
393
- ctx.buffer.appendUnknown({
394
- ...target,
395
- kind: input.kind ?? null,
396
- prompt: input.prompt,
397
- options: input.options,
398
- });
399
- return `Unknown noted (buffered). Continue with the next row.`;
400
- }
401
- const id = recordUnknown(db, {
402
- file_id: ctx.fileId ?? null,
403
- transaction_id: target.transaction_id,
404
- account_id: target.account_id,
405
460
  kind: input.kind ?? null,
406
461
  prompt: input.prompt,
407
462
  options: input.options,
408
463
  });
409
- return `Unknown noted (${id}). Continue with the next row.`;
464
+ if (ctx.progress && ctx.chunkId) {
465
+ ctx.progress.emit({ chunkId: ctx.chunkId, kind: "question" });
466
+ }
467
+ return `Question noted (${id}). Continue with the next row.`;
410
468
  }
411
- export const scanUnknownTools = {
412
- DEFS: UNKNOWN_DEFS,
413
- LABELS: UNKNOWN_LABELS,
414
- execute: unknownExecute,
469
+ export const scanQuestionTools = {
470
+ DEFS: QUESTION_DEFS,
471
+ LABELS: QUESTION_LABELS,
472
+ execute: questionExecute,
415
473
  };
416
- /**
417
- * Resolve-only tool definitions
418
- *
419
- * `ask_user` is the only interactive primitive. Scan never reaches it (the
420
- * scan profile doesn't include this module), so we don't need a "scan, please
421
- * don't use this" guard.
422
- */
423
474
  const RESOLVE_DEFS = [
424
475
  {
425
476
  name: "ask_user",
426
- description: "Ask the user a clarifying question when you cannot confidently proceed. The pipeline pauses and prompts the user interactively. Available during `plasalid resolve`. Not exposed during `plasalid scan` — use `note_unknown` instead. Pass `transaction_id` / `account_id` to attach the question to the same target as a scan-noted unknown. Pass `unknown_id` to resolve an existing open unknown in place (recommended when re-posing a scan-noted unknown to the user). Pass `related_unknown_ids` to apply the user's single answer to a whole group of sibling unknowns at once.",
477
+ description: "Ask the user a clarifying question when you cannot confidently proceed. The pipeline pauses and prompts the user interactively. Available during `plasalid resolve`. Not exposed during `plasalid scan` — use `note_question` instead. Pass `question_id` to close an existing question in place. Pass `related_question_ids` to apply the user's single answer to a whole group of sibling questions at once.",
427
478
  input_schema: {
428
479
  type: "object",
429
480
  properties: {
@@ -436,122 +487,111 @@ const RESOLVE_DEFS = [
436
487
  description: "Optional list of candidate answers.",
437
488
  items: { type: "string" },
438
489
  },
439
- transaction_id: {
440
- type: "string",
441
- description: "Optional: transaction this question is about. Used to clear the transaction's has_unknown flag once all its unknowns close.",
442
- },
443
- account_id: {
444
- type: "string",
445
- description: "Optional: account this question is about. Used to clear the account's has_unknown flag once all its unknowns close.",
446
- },
447
- unknown_id: {
490
+ question_id: {
448
491
  type: "string",
449
- description: "Optional: id of an existing open unknown. If supplied, the user's answer resolves that row in place instead of creating a new one.",
492
+ description: "Id of the primary question this resolves. The user's answer closes (deletes) that row.",
450
493
  },
451
- related_unknown_ids: {
494
+ related_question_ids: {
452
495
  type: "array",
453
496
  items: { type: "string" },
454
- description: "Optional: ids of additional open unknowns that share the same answer as `unknown_id`. The user is prompted once; every listed unknown (plus the primary) is marked resolved with the same answer. Use this for grouping duplicate questions — e.g., 12 Lazada rows that all categorize the same way — so the user isn't asked the same thing twelve times.",
497
+ description: "Optional: ids of additional questions that share the same answer as `question_id`. The user is prompted once; every listed question (plus the primary) is closed with the same answer.",
455
498
  },
456
499
  facts: {
457
500
  type: "object",
458
- description: "Optional structured highlights rendered as a single colored header line above the question. Provide whichever fields apply; the prompter colorizes each by category (amount=yellow, date=cyan, merchant=green, accounts=magenta). Keep the `prompt` text short — the facts header carries the context.",
501
+ description: "Optional structured highlights rendered as a single colored header line above the question.",
459
502
  properties: {
460
- amount: {
461
- type: "string",
462
- description: "฿-formatted amount, e.g. '฿1,200.00'.",
463
- },
464
- date: {
465
- type: "string",
466
- description: "ISO date or short range, e.g. '2026-04-15' or '2026-02-15 to 2026-05-15'.",
467
- },
468
- merchant: {
469
- type: "string",
470
- description: "Counterparty / merchant name, e.g. 'LAZADA TH', 'Spotify'.",
471
- },
503
+ amount: { type: "string" },
504
+ date: { type: "string" },
505
+ merchant: { type: "string" },
472
506
  accounts: {
473
507
  type: "array",
474
508
  items: { type: "string" },
475
- description: "Human account names involved. For merges, list the survivor first.",
476
509
  },
477
510
  },
478
511
  },
479
512
  },
480
- required: ["prompt"],
513
+ required: ["prompt", "question_id"],
481
514
  },
482
515
  },
483
516
  {
484
- name: "close_unknown",
485
- description: "Close an open unknown by writing its answer to the row WITHOUT prompting the user. Use after applying a mutation that a memory rule, heuristic, or small-amount auto-skip already implied. Pass `related_unknown_ids` to close a sibling group in one call. The actual mutation (update_posting / record_recurrence / merge_accounts / etc.) must be done BEFORE this call — close_unknown only records the answer for audit.",
517
+ name: "close_question",
518
+ description: "Close an question by writing its answer and deleting the row WITHOUT prompting the user. Use after applying a mutation that a memory rule or heuristic already implied. Pass `related_question_ids` to close a sibling group in one call.",
486
519
  input_schema: {
487
520
  type: "object",
488
521
  properties: {
489
- unknown_id: { type: "string" },
522
+ question_id: { type: "string" },
490
523
  answer: {
491
524
  type: "string",
492
525
  description: "The implied answer to record.",
493
526
  },
494
- related_unknown_ids: { type: "array", items: { type: "string" } },
527
+ related_question_ids: { type: "array", items: { type: "string" } },
495
528
  },
496
- required: ["unknown_id", "answer"],
529
+ required: ["question_id", "answer"],
497
530
  },
498
531
  },
499
532
  ];
500
533
  const RESOLVE_LABELS = {
501
534
  ask_user: "Asking for clarification",
502
- close_unknown: "Closing unknown",
535
+ close_question: "Closing question",
503
536
  };
504
- async function resolveExecute(db, name, input, ctx) {
505
- if (name === "close_unknown")
506
- return closeUnknown(db, input);
537
+ async function resolveIngestExecute(db, name, input, ctx) {
538
+ if (name === "close_question")
539
+ return closeQuestionTool(db, input, ctx);
507
540
  if (name !== "ask_user")
508
541
  return undefined;
509
- if (!ctx?.promptUser) {
510
- return "ask_user requires an interactive resolve session.";
542
+ if (!ctx)
543
+ return "ask_user is only available inside an agent session.";
544
+ const primary = String(input.question_id ?? "");
545
+ if (!primary)
546
+ return "ask_user requires question_id.";
547
+ if (ctx.interactive && ctx.promptUser) {
548
+ const answer = await ctx.promptUser(input.prompt, input.options, input.facts);
549
+ const { closeQuestion } = await import("../../db/queries/questions.js");
550
+ const captured = closeQuestion(db, primary, answer);
551
+ if (!captured)
552
+ return `Question ${primary} not found.`;
553
+ ctx.onQuestionClosed?.(captured);
554
+ let propagated = 0;
555
+ const siblings = Array.isArray(input.related_question_ids) ? input.related_question_ids : [];
556
+ for (const sibId of siblings) {
557
+ if (sibId === primary)
558
+ continue;
559
+ const sibClosed = closeQuestion(db, String(sibId), answer);
560
+ if (sibClosed) {
561
+ ctx.onQuestionClosed?.(sibClosed);
562
+ propagated++;
563
+ }
564
+ }
565
+ const total = 1 + propagated;
566
+ return `User answered: ${answer}${total > 1 ? ` (applied to ${total} questions)` : ""}`;
511
567
  }
512
- const id = input.unknown_id
513
- ? String(input.unknown_id)
514
- : recordUnknown(db, {
515
- file_id: ctx.fileId ?? null,
516
- transaction_id: input.transaction_id ?? null,
517
- account_id: input.account_id ?? null,
518
- prompt: input.prompt,
519
- options: input.options,
520
- });
521
- if (!getUnknownTarget(db, id))
522
- return `Unknown ${id} not found.`;
523
- const answer = await ctx.promptUser(input.prompt, input.options, input.facts);
524
- return applyAnswerToGroup(db, id, answer, input.related_unknown_ids);
568
+ return `Awaiting user input — cannot proceed in non-interactive mode.`;
525
569
  }
526
- function closeUnknown(db, input) {
527
- const primary = String(input.unknown_id ?? "");
570
+ async function closeQuestionTool(db, input, ctx) {
571
+ const { closeQuestion } = await import("../../db/queries/questions.js");
572
+ const primary = String(input.question_id ?? "");
528
573
  const answer = String(input.answer ?? "");
529
574
  if (!primary || !answer)
530
- return "close_unknown requires unknown_id and answer.";
531
- if (!getUnknownTarget(db, primary))
532
- return `Unknown ${primary} not found.`;
533
- return applyAnswerToGroup(db, primary, answer, input.related_unknown_ids);
534
- }
535
- function applyAnswerToGroup(db, primaryId, answer, rawSiblings) {
536
- resolveUnknown(db, primaryId, answer);
537
- const siblings = Array.isArray(rawSiblings) ? rawSiblings.map(String) : [];
538
- const resolved = [primaryId];
539
- const notFound = [];
575
+ return "close_question requires question_id and answer.";
576
+ const captured = closeQuestion(db, primary, answer);
577
+ if (!captured)
578
+ return `Question ${primary} not found.`;
579
+ ctx?.onQuestionClosed?.(captured);
580
+ let count = 1;
581
+ const siblings = Array.isArray(input.related_question_ids) ? input.related_question_ids : [];
540
582
  for (const sibId of siblings) {
541
- if (sibId === primaryId)
583
+ if (sibId === primary)
542
584
  continue;
543
- if (resolveUnknown(db, sibId, answer))
544
- resolved.push(sibId);
545
- else
546
- notFound.push(sibId);
585
+ const sibClosed = closeQuestion(db, String(sibId), answer);
586
+ if (sibClosed) {
587
+ ctx?.onQuestionClosed?.(sibClosed);
588
+ count++;
589
+ }
547
590
  }
548
- const preface = `Resolved ${resolved.length} unknown${resolved.length === 1 ? "" : "s"} with: ${sanitizeForPrompt(answer)}`;
549
- if (notFound.length === 0)
550
- return preface;
551
- return `${preface}. NOT FOUND: ${notFound.join(", ")} — these ids did not exist; do not re-close them.`;
591
+ return `Closed ${count} question${count === 1 ? "" : "s"}.`;
552
592
  }
553
593
  export const resolveIngestTools = {
554
594
  DEFS: RESOLVE_DEFS,
555
595
  LABELS: RESOLVE_LABELS,
556
- execute: resolveExecute,
596
+ execute: resolveIngestExecute,
557
597
  };