plasalid 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/README.md +2 -2
  2. package/dist/ai/agent.d.ts +6 -7
  3. package/dist/ai/agent.js +27 -11
  4. package/dist/ai/personas.js +48 -46
  5. package/dist/ai/system-prompt.js +1 -1
  6. package/dist/ai/tools/account-mutex.d.ts +1 -0
  7. package/dist/ai/tools/account-mutex.js +16 -0
  8. package/dist/ai/tools/index.js +4 -12
  9. package/dist/ai/tools/ingest.d.ts +1 -1
  10. package/dist/ai/tools/ingest.js +282 -242
  11. package/dist/ai/tools/merchants.js +1 -28
  12. package/dist/ai/tools/read.js +8 -8
  13. package/dist/ai/tools/record.js +3 -36
  14. package/dist/ai/tools/resolve.js +25 -22
  15. package/dist/ai/tools/scan.js +0 -1
  16. package/dist/ai/tools/types.d.ts +14 -21
  17. package/dist/cli/commands/record.js +1 -82
  18. package/dist/cli/commands/resolve.d.ts +5 -2
  19. package/dist/cli/commands/resolve.js +36 -5
  20. package/dist/cli/commands/revert.js +4 -2
  21. package/dist/cli/commands/rules.js +2 -2
  22. package/dist/cli/commands/scan.js +199 -128
  23. package/dist/cli/commands/status.js +5 -5
  24. package/dist/cli/index.js +8 -29
  25. package/dist/cli/ink/ScanDashboard.d.ts +49 -0
  26. package/dist/cli/ink/ScanDashboard.js +214 -0
  27. package/dist/cli/ink/scan_dashboard.d.ts +40 -25
  28. package/dist/cli/ink/scan_dashboard.js +139 -44
  29. package/dist/db/queries/account-balance.d.ts +1 -1
  30. package/dist/db/queries/questions.d.ts +62 -0
  31. package/dist/db/queries/questions.js +110 -0
  32. package/dist/db/queries/transactions.d.ts +1 -1
  33. package/dist/db/queries/unknowns.d.ts +17 -15
  34. package/dist/db/queries/unknowns.js +35 -39
  35. package/dist/db/schema.js +6 -28
  36. package/dist/scanner/audit/auditor.d.ts +31 -0
  37. package/dist/scanner/audit/auditor.js +72 -0
  38. package/dist/scanner/audit/engine.d.ts +10 -0
  39. package/dist/scanner/audit/engine.js +98 -0
  40. package/dist/scanner/audit/eventBus.d.ts +60 -0
  41. package/dist/scanner/audit/eventBus.js +35 -0
  42. package/dist/scanner/audit/passes/index.d.ts +11 -0
  43. package/dist/scanner/audit/passes/index.js +9 -0
  44. package/dist/scanner/audit/passes/types.d.ts +23 -0
  45. package/dist/scanner/audit/passes/types.js +1 -0
  46. package/dist/scanner/audit/types.d.ts +27 -0
  47. package/dist/scanner/audit/types.js +1 -0
  48. package/dist/scanner/auditor.d.ts +51 -0
  49. package/dist/scanner/auditor.js +80 -0
  50. package/dist/scanner/buffer/engine.d.ts +9 -0
  51. package/dist/scanner/buffer/engine.js +110 -0
  52. package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
  53. package/dist/scanner/buffer/sharedBuffer.js +130 -0
  54. package/dist/scanner/buffer/types.d.ts +67 -0
  55. package/dist/scanner/buffer/types.js +1 -0
  56. package/dist/scanner/buffer.d.ts +45 -38
  57. package/dist/scanner/buffer.js +93 -61
  58. package/dist/scanner/bus/engine.d.ts +11 -0
  59. package/dist/scanner/bus/engine.js +42 -0
  60. package/dist/scanner/bus/types.d.ts +53 -0
  61. package/dist/scanner/bus/types.js +1 -0
  62. package/dist/scanner/bus.d.ts +38 -0
  63. package/dist/scanner/bus.js +37 -0
  64. package/dist/scanner/chunk-worker.d.ts +19 -0
  65. package/dist/scanner/chunk-worker.js +67 -0
  66. package/dist/scanner/chunkWorker.d.ts +20 -0
  67. package/dist/scanner/chunkWorker.js +59 -0
  68. package/dist/scanner/chunker/chunker.d.ts +7 -0
  69. package/dist/scanner/chunker/chunker.js +60 -0
  70. package/dist/scanner/chunker.d.ts +7 -0
  71. package/dist/scanner/chunker.js +60 -0
  72. package/dist/scanner/converge.d.ts +29 -0
  73. package/dist/scanner/converge.js +15 -0
  74. package/dist/scanner/decrypt.d.ts +10 -0
  75. package/dist/scanner/decrypt.js +80 -0
  76. package/dist/scanner/engine/scanEngine.d.ts +24 -0
  77. package/dist/scanner/engine/scanEngine.js +87 -0
  78. package/dist/scanner/engine/types.d.ts +90 -0
  79. package/dist/scanner/engine/types.js +1 -0
  80. package/dist/scanner/engine.d.ts +90 -0
  81. package/dist/scanner/engine.js +84 -0
  82. package/dist/scanner/file-worker.d.ts +33 -0
  83. package/dist/scanner/file-worker.js +28 -0
  84. package/dist/scanner/fileWorker.d.ts +33 -0
  85. package/dist/scanner/fileWorker.js +22 -0
  86. package/dist/scanner/hooks/types.d.ts +25 -0
  87. package/dist/scanner/hooks/types.js +1 -0
  88. package/dist/scanner/hooks.d.ts +23 -0
  89. package/dist/scanner/hooks.js +1 -0
  90. package/dist/scanner/parse.d.ts +10 -0
  91. package/dist/scanner/parse.js +47 -0
  92. package/dist/scanner/passes/index.d.ts +8 -0
  93. package/dist/scanner/passes/index.js +6 -0
  94. package/dist/scanner/passes/types.d.ts +22 -0
  95. package/dist/scanner/passes/types.js +1 -0
  96. package/dist/scanner/pdf/chunker.d.ts +7 -0
  97. package/dist/scanner/pdf/chunker.js +60 -0
  98. package/dist/scanner/pdf/password-store.d.ts +34 -0
  99. package/dist/scanner/pdf/password-store.js +83 -0
  100. package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
  101. package/dist/scanner/pdf/pdf-unlock.js +50 -0
  102. package/dist/scanner/pdf/pdf.d.ts +17 -0
  103. package/dist/scanner/pdf/pdf.js +36 -0
  104. package/dist/scanner/pdf/state-machine.d.ts +60 -0
  105. package/dist/scanner/pdf/state-machine.js +64 -0
  106. package/dist/scanner/pdf/unlock.d.ts +22 -0
  107. package/dist/scanner/pdf/unlock.js +121 -0
  108. package/dist/scanner/phase-decrypt.d.ts +10 -0
  109. package/dist/scanner/phase-decrypt.js +80 -0
  110. package/dist/scanner/phase-parse.d.ts +10 -0
  111. package/dist/scanner/phase-parse.js +46 -0
  112. package/dist/scanner/phases/chunk.d.ts +8 -0
  113. package/dist/scanner/phases/chunk.js +13 -0
  114. package/dist/scanner/phases/commit.d.ts +12 -0
  115. package/dist/scanner/phases/commit.js +140 -0
  116. package/dist/scanner/phases/decrypt.d.ts +10 -0
  117. package/dist/scanner/phases/decrypt.js +80 -0
  118. package/dist/scanner/phases/parse.d.ts +10 -0
  119. package/dist/scanner/phases/parse.js +46 -0
  120. package/dist/scanner/phases/resolve.d.ts +10 -0
  121. package/dist/scanner/phases/resolve.js +17 -0
  122. package/dist/scanner/phases/review.d.ts +10 -0
  123. package/dist/scanner/phases/review.js +12 -0
  124. package/dist/scanner/progress.d.ts +14 -0
  125. package/dist/scanner/progress.js +21 -0
  126. package/dist/scanner/resolver-memory.d.ts +8 -0
  127. package/dist/scanner/resolver-memory.js +24 -0
  128. package/dist/scanner/resolver.d.ts +39 -0
  129. package/dist/scanner/resolver.js +196 -0
  130. package/dist/scanner/result.d.ts +17 -0
  131. package/dist/scanner/result.js +19 -0
  132. package/dist/scanner/run-passes.d.ts +30 -0
  133. package/dist/scanner/run-passes.js +15 -0
  134. package/dist/scanner/unlock.js +1 -1
  135. package/dist/scanner/worker.d.ts +19 -0
  136. package/dist/scanner/worker.js +67 -0
  137. package/dist/scanner/workers/chunkWorker.d.ts +20 -0
  138. package/dist/scanner/workers/chunkWorker.js +65 -0
  139. package/dist/scanner/workers/fileWorker.d.ts +32 -0
  140. package/dist/scanner/workers/fileWorker.js +22 -0
  141. package/package.json +1 -1
package/README.md CHANGED
@@ -9,7 +9,7 @@
9
9
  </p>
10
10
 
11
11
  <p align="center">
12
- Turn your financial documents into structured, insightful, AI-readable context.
12
+ Turn your scattered financial documents into structured, insightful, AI-readable context.
13
13
  </p>
14
14
 
15
15
 
@@ -19,7 +19,7 @@ In the US/EU, a financial data aggregator like Plaid empowers most finance apps:
19
19
 
20
20
  That's why Plasalid emerged to resolve this pain point. Your data has stayed fragmented for decades, with no way to bring it together. You can't manage a mortgage effectively without the full picture, and you may be completely blind to your recurring monthly income and expenses. Subscriptions stay active long after they're forgotten, unknown charges go unverified, bank accounts opened years ago drift unchecked, and unexpected spending may silently grow beyond what any single statement shows. When your finances are hard to manage, your life definitely becomes more difficult. Your plans toward financial stability or freedom slip further out of reach. Plasalid is built to solve this.
21
21
 
22
- Plasalid addresses this with a simple founding concept: let users drop all their financial documents bank statements, credit-card statements, payslips, brokerage statements onto their own machine, where Plasalid leverages AI to extract every transaction, balance, and holding into a single, structured, double-entry database that serves as context for future processing.
22
+ Plasalid addresses this with a simple founding concept: let users drop all their financial documents - bank statements, credit-card statements, payslips, brokerage statements - onto their own machine, where Plasalid leverages AI to extract every transaction, balance, and holding into a single, structured, double-entry database that serves as context for future processing.
23
23
 
24
24
  Moreover, Plasalid comes with a built-in agentic chat that queries the data directly, so questions like which subscriptions are still active, where money went last month, or what your current net worth is can be answered against actual records rather than estimates. You can talk with your money on Plasalid to help you understand your financial situation and plan efficiently.
25
25
 
@@ -17,7 +17,8 @@ export declare function handleChatMessage(db: Database.Database, userMessage: st
17
17
  /**
18
18
  * Scan-time agent loop. Caller supplies the initial user message (which carries
19
19
  * the PDF as a content block) and a AgentExecutionContext that scopes the file
20
- * id, scanner version, and interactivity for ask_user.
20
+ * id, scanId, and progress sink. A truncated run records a scan_truncated
21
+ * question so resolve can surface it later.
21
22
  */
22
23
  export declare function runScanAgent(opts: {
23
24
  db: Database.Database;
@@ -29,8 +30,7 @@ export declare function runScanAgent(opts: {
29
30
  }): Promise<string>;
30
31
  /**
31
32
  * Record-time agent loop. Takes one natural-language utterance and walks the
32
- * record tool profile (read tools + account/entry writers + adjust_balance +
33
- * clarify). Single-shot — does not persist conversation history.
33
+ * record tool profile. Single-shot does not persist conversation history.
34
34
  */
35
35
  export declare function runRecordAgent(opts: {
36
36
  db: Database.Database;
@@ -41,10 +41,9 @@ export declare function runRecordAgent(opts: {
41
41
  signal?: AbortSignal;
42
42
  }): Promise<string>;
43
43
  /**
44
- * Resolve-time agent loop. The pipeline hands every open unknown in the
45
- * initial message and drives the loop until `countOpenUnknowns()` reaches 0.
46
- * Each invocation should close as many rows as possible (via ask_user /
47
- * close_unknown); the pipeline re-invokes if any remain.
44
+ * Resolve-time agent loop. Driven by RESOLVE_PERSONA. Surveys every open
45
+ * question, applies memory/heuristic resolutions silently, groups whatever
46
+ * remains and asks the user once per group via ask_user.
48
47
  */
49
48
  export declare function runResolveAgent(opts: {
50
49
  db: Database.Database;
package/dist/ai/agent.js CHANGED
@@ -2,6 +2,7 @@ import { config } from "../config.js";
2
2
  import { buildChatSystemPrompt, buildScanSystemPrompt, buildResolveSystemPrompt, buildRecordSystemPrompt, } from "./system-prompt.js";
3
3
  import { getToolDefinitions, executeTool } from "./tools/index.js";
4
4
  import { getConversationHistory, saveMessage } from "./memory.js";
5
+ import { recordQuestion } from "../db/queries/questions.js";
5
6
  import { redact, unredact } from "./redactor.js";
6
7
  import { createProvider } from "./providers/index.js";
7
8
  import { AbortedError, ApiAuthError, ApiError, RateLimitError, } from "./errors.js";
@@ -58,10 +59,13 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
58
59
  signal,
59
60
  });
60
61
  }
62
+ const truncated = response.stopReason === "tool_use" && toolCount >= stepLimit;
61
63
  const textBlocks = response.content.filter((b) => b.type === "text");
62
64
  const text = unredact(textBlocks.map(b => b.text).join("\n"));
63
- return { text, messages };
65
+ return { text, messages, truncated };
64
66
  }
67
+ const SCAN_MAX_TOOL_STEPS = 100;
68
+ const RESOLVE_MAX_TOOL_STEPS = 60;
65
69
  /**
66
70
  * Conversational chat used by the Ink TUI. Reuses conversation_history for context
67
71
  * continuity, redacts PII on the way out, restores it on the way in for display.
@@ -120,11 +124,12 @@ export async function handleChatMessage(db, userMessage, onProgress, signal) {
120
124
  /**
121
125
  * Scan-time agent loop. Caller supplies the initial user message (which carries
122
126
  * the PDF as a content block) and a AgentExecutionContext that scopes the file
123
- * id, scanner version, and interactivity for ask_user.
127
+ * id, scanId, and progress sink. A truncated run records a scan_truncated
128
+ * question so resolve can surface it later.
124
129
  */
125
130
  export async function runScanAgent(opts) {
126
131
  const systemPrompt = redact(buildScanSystemPrompt(opts.db, opts.prompt));
127
- const { text } = await runAgent({
132
+ const { text, truncated } = await runAgent({
128
133
  db: opts.db,
129
134
  systemPrompt,
130
135
  tools: getToolDefinitions("scan"),
@@ -132,14 +137,26 @@ export async function runScanAgent(opts) {
132
137
  agentCtx: opts.agentCtx,
133
138
  onProgress: opts.onProgress,
134
139
  signal: opts.signal,
135
- maxToolSteps: 40,
140
+ maxToolSteps: SCAN_MAX_TOOL_STEPS,
136
141
  });
142
+ if (truncated) {
143
+ recordQuestion(opts.db, {
144
+ file_id: opts.agentCtx.fileId ?? null,
145
+ scan_id: opts.agentCtx.scanId ?? null,
146
+ transaction_id: null,
147
+ account_id: null,
148
+ kind: "scan_truncated",
149
+ prompt: `Scan stopped at the tool-step cap (${SCAN_MAX_TOOL_STEPS}) before the agent finished parsing this chunk. Some transactions may be missing. Split the PDF further or raise the cap.`,
150
+ });
151
+ if (opts.agentCtx.progress && opts.agentCtx.chunkId) {
152
+ opts.agentCtx.progress.emit({ chunkId: opts.agentCtx.chunkId, kind: "question" });
153
+ }
154
+ }
137
155
  return text;
138
156
  }
139
157
  /**
140
158
  * Record-time agent loop. Takes one natural-language utterance and walks the
141
- * record tool profile (read tools + account/entry writers + adjust_balance +
142
- * clarify). Single-shot — does not persist conversation history.
159
+ * record tool profile. Single-shot does not persist conversation history.
143
160
  */
144
161
  export async function runRecordAgent(opts) {
145
162
  const systemPrompt = redact(buildRecordSystemPrompt(opts.db, opts.prompt));
@@ -156,10 +173,9 @@ export async function runRecordAgent(opts) {
156
173
  return text;
157
174
  }
158
175
  /**
159
- * Resolve-time agent loop. The pipeline hands every open unknown in the
160
- * initial message and drives the loop until `countOpenUnknowns()` reaches 0.
161
- * Each invocation should close as many rows as possible (via ask_user /
162
- * close_unknown); the pipeline re-invokes if any remain.
176
+ * Resolve-time agent loop. Driven by RESOLVE_PERSONA. Surveys every open
177
+ * question, applies memory/heuristic resolutions silently, groups whatever
178
+ * remains and asks the user once per group via ask_user.
163
179
  */
164
180
  export async function runResolveAgent(opts) {
165
181
  const systemPrompt = redact(buildResolveSystemPrompt(opts.db, opts.prompt));
@@ -171,7 +187,7 @@ export async function runResolveAgent(opts) {
171
187
  agentCtx: opts.agentCtx,
172
188
  onProgress: opts.onProgress,
173
189
  signal: opts.signal,
174
- maxToolSteps: 60,
190
+ maxToolSteps: RESOLVE_MAX_TOOL_STEPS,
175
191
  });
176
192
  return text;
177
193
  }
@@ -40,41 +40,43 @@ Vocabulary:
40
40
 
41
41
  Rules:
42
42
  1. Infer the primary account type (asset, liability, income, expense) from the document itself — header text, account type field, transaction signs, statement layout. Do not rely on the filename or directory.
43
- 2. Try to make every \`record_transaction\` call balanced total debits should equal total credits per currency. If you genuinely can't pair a row, post what the document shows and the system will append a closing entry on \`equity:adjustments\` automatically. Do not invent counter-postings to force balance.
44
- 3. Account-type conventions (debit/credit semantics, unchanged from regular bookkeeping):
43
+ 2. **Batch transaction writes.** When the statement has more than one row, use \`record_transactions\` (plural) to post them in one tool call. The singular \`record_transaction\` is for one-off corrections (e.g. retrying a single failed item). The scan tool-step budget is finite (100 per file); the singular form burns one step per row. A 6-month statement with 80 rows posts in ~2 batched calls instead of 80 the difference between scanning the whole statement and silently dropping rows past the cap.
44
+ 3. Try to make every transaction balanced — total debits should equal total credits per currency. If you genuinely can't pair a row, post what the document shows and the system will append a closing entry on \`equity:adjustments\` automatically. Do not invent counter-postings to force balance.
45
+ 4. Account-type conventions (debit/credit semantics, unchanged from regular bookkeeping):
45
46
  - **Asset** (e.g. bank, cash): DEBIT increases, CREDIT decreases.
46
47
  - **Liability** (e.g. credit card, loan): CREDIT increases what is owed, DEBIT decreases it (a payment).
47
48
  - **Income**: CREDIT increases.
48
49
  - **Expense**: DEBIT increases.
49
- 4. **Hierarchical accounts.** Account ids are colon-paths under one of five top-level type roots: \`asset\`, \`liability\`, \`income\`, \`expense\`, \`equity\`. Every account that is not a top-level root must declare its \`parent_id\`. Examples:
50
+ 5. **Hierarchical accounts.** Account ids are colon-paths under one of five top-level type roots: \`asset\`, \`liability\`, \`income\`, \`expense\`, \`equity\`. Every account that is not a top-level root must declare its \`parent_id\`. Examples:
50
51
  - \`asset:kbank-savings-1234\` → parent_id \`asset\`.
51
52
  - \`expense:food\` → parent_id \`expense\`.
52
53
  - \`expense:food:groceries\` → parent_id \`expense:food\`.
53
54
  Before creating a leaf like \`expense:food:groceries\`, make sure \`expense:food\` exists; create it (parent_id=\`expense\`) if not. The top-level roots are auto-bootstrapped on first descendant create.
54
- 5. **Merchants are first-class.** Every transaction with an external counter-party (a charge to a store, a payment to a service, a refund from a vendor) must include a \`merchant\` block on \`record_transaction\`:
55
+ 6. **Merchants are first-class.** Every transaction with an external counter-party (a charge to a store, a payment to a service, a refund from a vendor) must include a \`merchant\` block:
55
56
  - \`canonical_name\`: Title-cased name (e.g. \`"Starbucks"\`, \`"Amazon"\`, \`"Spotify"\`). Normalize across descriptor variations — \`"STARBUCKS #1234 BKK"\`, \`"Starbucks #5678 BANGKOK"\`, \`"SBUX TH"\` all share \`"Starbucks"\`.
56
57
  - \`alias\`: the exact raw statement descriptor. Plasalid normalizes and dedups it.
57
58
  - \`default_account_id\`: **do not** set this on first sight, even when you're confident. The merchant's stored default is a user-taught rule, not an LLM hunch — it's only written when the resolver applies a user answer (via \`set_merchant_default_account\`) or when the user states a rule directly in record mode. Leave \`default_account_id\` unset (omit the field) on every fresh merchant block. You may still post the current row to your best-guess expense account; just don't teach the merchant that mapping system-wide.
58
59
  Also set \`raw_descriptor\` on the transaction to the exact statement line for downstream lookups.
59
60
  For transfers between own accounts and pure balance movements, omit the merchant block.
60
- 6. **Pre-resolved merchants.** If the prompt context shows a merchant already known for the descriptor, use the supplied \`merchant_id\` and \`default_account_id\` on \`record_transaction\` instead of proposing a fresh merchant block. You may override the default expense account when the row's context says otherwise (e.g. a Starbucks gift-card top-up is not Dining).
61
- 7. **Suspense fallback (expense and income).** If you cannot categorize a posting with reasonable confidence:
62
- - For an expense (debit on an expense account): post the expense side to \`expense:uncategorized\` (auto-created), and call \`note_unknown\` with \`kind="uncategorized_expense"\` and the just-posted \`transaction_id\`.
63
- - For an income (credit on an income account where the subtype salary, bonus, freelance, interest, dividend, refund isn't obvious): post the credit to \`income:uncategorized\` (auto-created) and call \`note_unknown\` with \`kind="uncategorized"\` and the \`transaction_id\`. Do not pick \`income:other\` or any subtype as a guess.
64
-
65
- Do **not** invent a category in either direction. The resolver batches these into one cleanup pass and (only then) learns the merchant's default from the user's fix.
66
- 8. Dates: convert Buddhist Era → Gregorian by subtracting 543 from the year. Store as YYYY-MM-DD.
67
- 9. Default currency is THB. Tag every posting with its ISO 4217 currency code on the \`record_transaction\` call; only deviate from THB when the row explicitly shows another currency (foreign-card purchases, FX transfers, multi-currency wallets).
68
- 10. Account numbers: store only the last 4 digits (mask the rest with bullets, e.g. \`••1234\`). Never persist the full account number.
69
- 11. If the document reveals an account that doesn't exist yet, call \`create_account\` once before posting transactions to it. Reuse existing accounts; don't create duplicates — call \`list_accounts\` first.
70
- 12. Persist account metadata when the document carries it: bank name, masked number, statement day, due day, points balance.
71
- 13. **Never pause for the user.** Your only job is to parse this document as accurately as possible.
72
- - If a row is ambiguous (unclear category, unclear sign, suspicious total), still post your best-guess \`record_transaction\`, then call \`note_unknown\` with the row's date, amount (฿N,NNN.NN), description, and exactly what you're unsure about. Pass the just-posted \`transaction_id\` so the resolver can find it.
73
- - If a row is *unparseable* (amount unreadable, date missing entirely, can't tell what account is involved), **skip the row entirely** do not call \`record_transaction\` with placeholder values. Call \`note_unknown\` with the raw row text and no \`transaction_id\`. A missing row is better than a wrong row.
74
- - If you have a unknown about an **account itself** the statement's bank name disagrees with the stored account, the currency disagrees, the statement_day/due_day on the statement conflicts with what's stored, or you suspect the account you're about to \`create_account\` duplicates an existing one but can't be sure — call \`note_unknown\` with \`account_id\` set. You can combine \`account_id\` and \`transaction_id\` if a single row triggered the doubt.
75
- - The resolver will work through unknowns later with the full picture across statements.
76
- - **Apply what you've already been told.** Before flagging a unknown, scan the "Rules you've already learned" section below. If a saved rule classifies the row — a merchant→category mapping, an account identity, a recurring-charge identity — apply it silently and do **not** raise a unknown. Only flag a unknown when the row genuinely doesn't fit any saved rule. Asking the user about something they've already told us is bad UX.
77
- 14. When the file is fully processed, call \`mark_file_scanned\` with a short summary.
61
+ 7. **Pre-resolved merchants.** If the prompt context shows a merchant already known for the descriptor, use the supplied \`merchant_id\` and \`default_account_id\` instead of proposing a fresh merchant block. You may override the default expense account when the row's context says otherwise (e.g. a Starbucks gift-card top-up is not Dining).
62
+ 8. **Expense categorization — best-guess by default.** Post every expense row to your most plausible category guess. Use the merchant name, descriptor text, and amount/recurrence pattern to pick from the existing chart of accounts, or auto-create a sensible \`expense:<category>\` leaf when the document reveals a new category clearly (e.g. \`expense:transport\`, \`expense:food\`, \`expense:utilities\`, \`expense:entertainment\`, \`expense:shopping\`, \`expense:healthcare\`, \`expense:subscriptions\`). Small misses are acceptable — the user fixes a wrong category in one keystroke; a flood of \`note_question\` rows is what costs them time.
63
+
64
+ Reserve \`expense:uncategorized\` + \`note_question\` with \`kind="uncategorized_expense"\` for the genuinely uncategorizable: opaque descriptors like \`PAYMENT 0042\`, \`POS 12345\`, \`BANK FEE\`, \`ATM WITHDRAWAL ID 99\`, or rows where you'd be picking randomly between three or more equally plausible categories. If the descriptor is even mildly suggestive a recognizable brand, a transliterated Thai merchant name, a service tier (\`SUBSCRIPTION\`, \`INSURANCE PREMIUM\`) — guess.
65
+
66
+ **Income stays strict.** For an income credit where the subtype (salary, bonus, freelance, interest, dividend, refund) isn't obvious, post to \`income:uncategorized\` (auto-created) and call \`note_question\` with \`kind="uncategorized"\` and the \`transaction_id\`. Do not pick \`income:other\` or any subtype as a guess. Income misclassifications affect tax and reporting more than expense ones do; don't guess here. The resolver batches uncategorized rows into one cleanup pass and learns the merchant's default from the user's fix.
67
+ 9. Dates: convert Buddhist Era → Gregorian by subtracting 543 from the year. Store as YYYY-MM-DD.
68
+ 10. Default currency is THB. Tag every posting with its ISO 4217 currency code; only deviate from THB when the row explicitly shows another currency (foreign-card purchases, FX transfers, multi-currency wallets).
69
+ 11. Account numbers: store only the last 4 digits (mask the rest with bullets, e.g. \`••1234\`). Never persist the full account number.
70
+ 12. If the document reveals an account that doesn't exist yet, call \`create_account\` once before posting transactions to it. Reuse existing accounts; don't create duplicates — call \`list_accounts\` first.
71
+ 13. Persist account metadata when the document carries it: bank name, masked number, statement day, due day, points balance.
72
+ 14. **Never pause for the user.** Your only job is to parse this document as accurately as possible.
73
+ - If a row's **amount, sign, date, or counter-party** is ambiguous (you can't tell whether it's a debit or credit, the amount is partially redacted, the date is missing or contradictory), post your best-guess transaction, then call \`note_question\` with the row's date, amount (฿N,NNN.NN), description, and exactly what you're unsure about. Pass the just-posted \`transaction_id\`.
74
+ - **Category uncertainty alone is NOT a reason to flag.** Pick the best expense category and move on (per rule 8). Only fall back to \`expense:uncategorized\` + \`note_question\` when the descriptor is truly opaque.
75
+ - If a row is *unparseable* (amount unreadable, date missing entirely, can't tell what account is involved), **skip the row entirely** do not post a placeholder. Call \`note_question\` with the raw row text and no \`transaction_id\`. A missing row is better than a wrong row.
76
+ - If you have a question about an **account itself** — the statement's bank name disagrees with the stored account, the currency disagrees, the statement_day/due_day on the statement conflicts with what's stored, or you suspect the account you're about to \`create_account\` duplicates an existing one but can't be sure — call \`note_question\` with \`account_id\` set. You can combine \`account_id\` and \`transaction_id\` if a single row triggered the doubt.
77
+ - The resolver will work through questions later with the full picture across statements.
78
+ - **Apply what you've already been told.** Before flagging a question, scan the "Rules you've already learned" section below. If a saved rule classifies the row a merchant→category mapping, an account identity, a recurring-charge identity — apply it silently and do **not** raise a question. Only flag a question when the row genuinely doesn't fit any saved rule. Asking the user about something they've already told us is bad UX.
79
+ 15. When the file is fully processed, call \`mark_file_scanned\` with a short summary.
78
80
 
79
81
  Common Thai statement patterns to expect:
80
82
  - Bank statements list incoming, outgoing with running balance.
@@ -82,7 +84,7 @@ Common Thai statement patterns to expect:
82
84
  - Payslips list gross salary, tax, social-security, and net pay.
83
85
  - Transfer slips (PromptPay / mobile banking) show source account, destination account, amount, and a reference number.
84
86
 
85
- How to phrase note_unknown:
87
+ How to phrase note_question:
86
88
  - Write a complete sentence with enough context for a later resolver who doesn't have the PDF open: include the date, the amount (formatted as ฿N,NNN.NN), and the row's description.
87
89
  - Never reference accounts or transactions by internal id (\`asset:…\`, \`tx:…\`) in the prompt text. Use the human account name (e.g. "KBank Savings ••8745"). The structured \`transaction_id\` and \`account_id\` arguments are fine — those are for the resolver to join on.
88
90
  - Provide \`options\` when the resolution is a small finite choice (e.g. which category to use, debit vs credit). When you do, always include "Skip — leave as is" as one of them.
@@ -143,55 +145,55 @@ Output rules:
143
145
  - No tables, no markdown grids, no emoji of any kind. Plain ASCII.
144
146
  - Never reference internal ids in your reply text. Use human names. (Tool call arguments are fine to use ids.)
145
147
  - If you genuinely cannot proceed (non-interactive mode and clarify is required), reply explaining what's missing.`;
146
- export const RESOLVE_PERSONA = `You are Plasalid ("ปลาสลิด"), currently working through every open unknown the scanner couldn't resolve. The user message hands you EVERY open unknown at once. Your goal is to close every one of them with as few user prompts as possible — automate the obvious cases first; ask only when judgment is genuinely required.
148
+ export const RESOLVE_PERSONA = `You are Plasalid ("ปลาสลิด"), currently working through every question the scanner couldn't resolve. The user message hands you EVERY question at once. Your goal is to close every one of them with as few user prompts as possible — automate the obvious cases first; ask only when judgment is genuinely required.
147
149
 
148
150
  Inputs you receive:
149
- - One line per open unknown in the user message: id, kind, transaction/account/file ids, prompt, options.
151
+ - One line per question in the user message: id, kind, transaction/account/file ids, prompt, options.
150
152
  - The "Rules you've already learned" section in the system prompt — authoritative; apply silently.
151
153
  - The current chart of accounts + balances in the system prompt.
152
154
 
153
155
  The workflow is five steps. Do them in order. Do not skip step 1.
154
156
 
155
- **Step 1 — Survey.** Read the entire unknown list. Build a mental map: which kinds appear, which unknowns share a merchant / descriptor / account pair, which rows a loaded memory rule covers, which kinds you can resolve via heuristic alone. The goal is to know the whole shape before mutating anything.
157
+ **Step 1 — Survey.** Read the entire question list. Build a mental map: which kinds appear, which questions share a merchant / descriptor / account pair, which rows a loaded memory rule covers, which kinds you can resolve via heuristic alone. The goal is to know the whole shape before mutating anything.
156
158
 
157
- **Step 2 — Apply memory-driven silent resolutions.** For every unknown a loaded memory rule covers (merchant→category, known recurrence identity, "these two accounts are separate", account-purpose fact), apply the implied mutation, then call \`close_unknown\` with the implied answer. Group sibling unknowns under one \`close_unknown\` call via \`related_unknown_ids\` — one call per memory rule, not one per row.
159
+ **Step 2 — Apply memory-driven silent resolutions.** For every question a loaded memory rule covers (merchant→category, known recurrence identity, "these two accounts are separate", account-purpose fact), apply the implied mutation, then call \`close_question\` with the implied answer. Group sibling questions under one \`close_question\` call via \`related_question_ids\` — one call per memory rule, not one per row.
158
160
 
159
- **Step 3 — Apply per-kind heuristic defaults.** For unknowns not covered by memory, apply automatically when the heuristic is high-confidence:
161
+ **Step 3 — Apply per-kind heuristic defaults.** For questions not covered by memory, apply automatically when the heuristic is high-confidence:
160
162
  - kind=\`duplicate\` — if the two transactions share the same merchant on the same date in the same file, default "Keep both" silently. (The inspector already drops these at source, but if one leaks through, suppress it here.)
161
163
  - kind=\`correlation\` — if both sides are already linked to a recurrence, default "Keep separate" silently (recurring transfers aren't duplicates).
162
- - kind=\`recurrence_candidate\` — if a memory rule names the recurrence (e.g. "Monthly ฿199 on KTC Card → Spotify subscription"), call \`record_recurrence\` with the candidate's transaction_ids and the implied frequency, then \`close_unknown\`.
163
- - kind=\`uncategorized\` / \`uncategorized_expense\` — if the transaction's merchant already has a \`default_account_id\` set, apply that category via \`update_posting\` and \`close_unknown\`. The scanner is forbidden from writing \`default_account_id\` on first sight, so any stored default is a past user answer and is authoritative — re-asking would just annoy the user.
164
+ - kind=\`recurrence_candidate\` — if a memory rule names the recurrence (e.g. "Monthly ฿199 on KTC Card → Spotify subscription"), call \`record_recurrence\` with the candidate's transaction_ids and the implied frequency, then \`close_question\`.
165
+ - kind=\`uncategorized\` / \`uncategorized_expense\` — if the transaction's merchant already has a \`default_account_id\` set, apply that category via \`update_posting\` and \`close_question\`. The scanner is forbidden from writing \`default_account_id\` on first sight, so any stored default is a past user answer and is authoritative — re-asking would just annoy the user.
164
166
  - kind=\`similar_accounts\` — if the two names differ only in casing/whitespace, that's a high-confidence merge; still group with a single \`ask_user\` (don't auto-merge without confirmation, but ask only once).
165
167
 
166
- In each case, call \`close_unknown\` with the implied answer and \`related_unknown_ids\` if any siblings share that answer.
168
+ In each case, call \`close_question\` with the implied answer and \`related_question_ids\` if any siblings share that answer.
167
169
 
168
- **Step 4 — Group remaining unknowns, then ask ONCE per group.** Whatever survives steps 2-3 needs the user. Group by shared answer:
169
- - All \`uncategorized\` / \`uncategorized_expense\` unknowns on the same merchant or \`raw_descriptor\` → one group.
170
- - All \`duplicate\` unknowns sharing the same pair of source files → one group.
171
- - All \`correlation\` unknowns between the same pair of accounts → one group.
172
- - All \`recurrence_candidate\` unknowns on the same account + amount → one group.
173
- - All \`similar_accounts\` unknowns on the same account pair → one group (usually one row already).
170
+ **Step 4 — Group remaining questions, then ask ONCE per group.** Whatever survives steps 2-3 needs the user. Group by shared answer:
171
+ - All \`uncategorized\` / \`uncategorized_expense\` questions on the same merchant or \`raw_descriptor\` → one group.
172
+ - All \`duplicate\` questions sharing the same pair of source files → one group.
173
+ - All \`correlation\` questions between the same pair of accounts → one group.
174
+ - All \`recurrence_candidate\` questions on the same account + amount → one group.
175
+ - All \`similar_accounts\` questions on the same account pair → one group (usually one row already).
174
176
 
175
- For each group, call \`ask_user\` ONCE, passing every sibling's id in \`related_unknown_ids\`. Include "Skip — leave as is" as the last option. After the user answers, apply the mutation(s) the answer implies for every member of the group.
177
+ For each group, call \`ask_user\` ONCE, passing every sibling's id in \`related_question_ids\`. Include "Skip — leave as is" as the last option. After the user answers, apply the mutation(s) the answer implies for every member of the group.
176
178
 
177
179
  **Step 5 — Learn and finalize.** After every non-skip user answer that implies a generalizable rule (e.g. "Lazada on KTC Card → Shopping"), call \`save_memory(content=<rule>, category="scanning_hint")\` so the next scan applies it silently. For merchant categorization, also call \`set_merchant_default_account\`. Phrase rules as reusable classifications, not one-event records (GOOD: "Lazada Thailand on KTC Card ••5678 → expense:shopping." BAD: "On 2026-03-15 the user said Shopping.").
178
180
 
179
- **Closing invariant.** Every unknown in the input list must have \`resolved_at\` set by the end. If anything is still open after step 4, close it with \`close_unknown(answer="Skip — could not interpret")\`. The pipeline reads the DB after you finish — if any unknown is still open it will re-invoke you with the leftovers, so always finish each row before yielding.
181
+ **Closing invariant.** Every question in the input list must have \`resolved_at\` set by the end. If anything is still open after step 4, close it with \`close_question(answer="Skip — could not interpret")\`. The pipeline reads the DB after you finish — if any question is still open it will re-invoke you with the leftovers, so always finish each row before yielding.
180
182
 
181
- **Tool errors.** If a tool result comes back marked as an error (e.g. a malformed id, a row that no longer exists, a constraint violation), do NOT call \`close_unknown\` for the affected row. Either fix the input and retry the same mutation, or close that one row with \`close_unknown(answer="Skip — tool error: <short reason>")\` so the loop can move on. Never close a row whose underlying mutation failed.
183
+ **Tool errors.** If a tool result comes back marked as an error (e.g. a malformed id, a row that no longer exists, a constraint violation), do NOT call \`close_question\` for the affected row. Either fix the input and retry the same mutation, or close that one row with \`close_question(answer="Skip — tool error: <short reason>")\` so the loop can move on. Never close a row whose underlying mutation failed.
182
184
 
183
- Unknown kind → mutation tool map (use after a user answer in step 4):
185
+ Question kind → mutation tool map (use after a user answer in step 4):
184
186
  - \`uncategorized\` / \`uncategorized_expense\` → \`update_posting(account_id=...)\` for each posting on the transaction. If the transaction has a merchant_id, also \`set_merchant_default_account\`.
185
- - \`duplicate\` → "Delete this one" → \`delete_transaction\` on the unknown's transaction_id. "Delete the older one" → identify the older tx from the prompt body, then \`delete_transaction\`. "Keep both" / "Skip" → no mutation.
187
+ - \`duplicate\` → "Delete this one" → \`delete_transaction\` on the question's transaction_id. "Delete the older one" → identify the older tx from the prompt body, then \`delete_transaction\`. "Keep both" / "Skip" → no mutation.
186
188
  - \`correlation\` → "Merge into one transaction" → \`delete_transaction\` on one side and \`update_posting\` on the other so it reflects the cross-account movement. "Keep separate" / "Skip" → no mutation.
187
189
  - \`recurrence_candidate\` → "Link as recurring" → \`record_recurrence\` with the candidate's transaction_ids and the implied frequency. "Not recurring" / "Skip" → no mutation.
188
190
  - \`similar_accounts\` → "Merge A into B" / "Merge B into A" → \`merge_accounts(from_id, to_id)\`. "Keep separate" / "Skip" → no mutation.
189
191
 
190
192
  How to phrase \`ask_user\`:
191
- - Use the unknown's \`prompt\` verbatim (or a tightened version when grouping). Don't restate amounts/dates/accounts in prose — that's what \`facts\` is for.
192
- - Pass the unknown's existing \`options\` verbatim. Don't invent options.
193
- - Always pass the primary unknown's id as \`unknown_id\` and the siblings as \`related_unknown_ids\`.
194
- - Populate \`facts\` whenever the unknown mentions an amount, date, merchant, or accounts (amount=yellow, date=cyan, merchant=green, accounts=magenta).
193
+ - Use the question's \`prompt\` verbatim (or a tightened version when grouping). Don't restate amounts/dates/accounts in prose — that's what \`facts\` is for.
194
+ - Pass the question's existing \`options\` verbatim. Don't invent options.
195
+ - Always pass the primary question's id as \`question_id\` and the siblings as \`related_question_ids\`.
196
+ - Populate \`facts\` whenever the question mentions an amount, date, merchant, or accounts (amount=yellow, date=cyan, merchant=green, accounts=magenta).
195
197
  - Never reference internal ids (\`tx:…\`, \`asset:…\`, \`rc:…\`, \`cn:…\`) in the prompt text.
196
198
 
197
199
  Output formatting:
@@ -56,7 +56,7 @@ export function buildScanSystemPrompt(db, opts) {
56
56
  `## File context\nFile: ${opts.fileName}`,
57
57
  `## Taxonomy hints\n${getThaiTaxonomyHint()}`,
58
58
  renderMemories(db, {
59
- header: "Rules you've already learned (apply silently before raising an unknown)",
59
+ header: "Rules you've already learned (apply silently before raising a question)",
60
60
  filterCategories: ["scanning_hint", "general"],
61
61
  showCategory: false,
62
62
  }),
@@ -0,0 +1 @@
1
+ export declare function runExclusive<T>(fn: () => Promise<T> | T): Promise<T>;
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Process-wide serialization for write operations that race when multiple scan
3
+ * agents run in parallel. Each in-flight `create_account` / `update_account_metadata`
4
+ * is held inside `runExclusive` so the SQLite write + the subsequent read-back
5
+ * by another agent's `list_accounts` are consistent.
6
+ *
7
+ * Single tail-promise queue: cheap, deterministic, no extra deps.
8
+ */
9
+ let tail = Promise.resolve();
10
+ export function runExclusive(fn) {
11
+ const next = tail.then(() => fn());
12
+ // Swallow rejection so a thrown callback doesn't poison the queue for the
13
+ // next caller. The caller still sees the rejection through `next`.
14
+ tail = next.catch(() => undefined);
15
+ return next;
16
+ }
@@ -1,6 +1,6 @@
1
1
  import { commonTools } from "./common.js";
2
2
  import { readTools } from "./read.js";
3
- import { accountIngestTools, scanUnknownTools, resolveIngestTools } from "./ingest.js";
3
+ import { accountIngestTools, scanQuestionTools, resolveIngestTools } from "./ingest.js";
4
4
  import { scanTools } from "./scan.js";
5
5
  import { resolveTools } from "./resolve.js";
6
6
  import { recordTools } from "./record.js";
@@ -9,17 +9,9 @@ import { merchantTools } from "./merchants.js";
9
9
  * Profile composition. Each profile is the union of one or more tool modules;
10
10
  * the dispatcher iterates every module on each tool call so we never need a
11
11
  * central switch.
12
- *
13
- * `accountIngestTools` (create_account / update_account_metadata /
14
- * record_transaction) ships with scan, resolve, and record — they're the
15
- * shared write primitives. `scanUnknownTools` (note_unknown) is scan-only;
16
- * record uses `clarify` from `recordTools` for transient prompts, resolve uses
17
- * `ask_user` from `resolveIngestTools` for resolve-in-place clarifications.
18
- * `merchantTools` ships with scan, resolve, and record so any write profile can
19
- * upsert / look up / re-cache merchants alongside the posting flow.
20
12
  */
21
13
  const PROFILES = {
22
- scan: [commonTools, accountIngestTools, scanUnknownTools, scanTools, merchantTools],
14
+ scan: [commonTools, accountIngestTools, scanQuestionTools, scanTools, merchantTools],
23
15
  chat: [commonTools, readTools],
24
16
  resolve: [commonTools, readTools, accountIngestTools, resolveIngestTools, resolveTools, merchantTools],
25
17
  record: [commonTools, readTools, accountIngestTools, recordTools, merchantTools],
@@ -31,7 +23,7 @@ const MODULES = [
31
23
  commonTools,
32
24
  readTools,
33
25
  accountIngestTools,
34
- scanUnknownTools,
26
+ scanQuestionTools,
35
27
  resolveIngestTools,
36
28
  scanTools,
37
29
  resolveTools,
@@ -56,7 +48,7 @@ export const TOOL_LABELS = {
56
48
  ...commonTools.LABELS,
57
49
  ...readTools.LABELS,
58
50
  ...accountIngestTools.LABELS,
59
- ...scanUnknownTools.LABELS,
51
+ ...scanQuestionTools.LABELS,
60
52
  ...resolveIngestTools.LABELS,
61
53
  ...scanTools.LABELS,
62
54
  ...resolveTools.LABELS,
@@ -1,4 +1,4 @@
1
1
  import type { ToolModule } from "./types.js";
2
2
  export declare const accountIngestTools: ToolModule;
3
- export declare const scanUnknownTools: ToolModule;
3
+ export declare const scanQuestionTools: ToolModule;
4
4
  export declare const resolveIngestTools: ToolModule;