npm - plasalid - Versions diffs - 0.7.1 → 0.7.2 - Mend

plasalid 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

package/README.md +2 -2
package/dist/ai/agent.d.ts +6 -7
package/dist/ai/agent.js +27 -11
package/dist/ai/personas.js +48 -46
package/dist/ai/system-prompt.js +1 -1
package/dist/ai/tools/account-mutex.d.ts +1 -0
package/dist/ai/tools/account-mutex.js +16 -0
package/dist/ai/tools/index.js +4 -12
package/dist/ai/tools/ingest.d.ts +1 -1
package/dist/ai/tools/ingest.js +282 -242
package/dist/ai/tools/merchants.js +1 -28
package/dist/ai/tools/read.js +8 -8
package/dist/ai/tools/record.js +3 -36
package/dist/ai/tools/resolve.js +25 -22
package/dist/ai/tools/scan.js +0 -1
package/dist/ai/tools/types.d.ts +14 -21
package/dist/cli/commands/record.js +1 -82
package/dist/cli/commands/resolve.d.ts +5 -2
package/dist/cli/commands/resolve.js +36 -5
package/dist/cli/commands/revert.js +4 -2
package/dist/cli/commands/rules.js +2 -2
package/dist/cli/commands/scan.js +199 -128
package/dist/cli/commands/status.js +5 -5
package/dist/cli/index.js +8 -29
package/dist/cli/ink/ScanDashboard.d.ts +49 -0
package/dist/cli/ink/ScanDashboard.js +214 -0
package/dist/cli/ink/scan_dashboard.d.ts +40 -25
package/dist/cli/ink/scan_dashboard.js +139 -44
package/dist/db/queries/account-balance.d.ts +1 -1
package/dist/db/queries/questions.d.ts +62 -0
package/dist/db/queries/questions.js +110 -0
package/dist/db/queries/transactions.d.ts +1 -1
package/dist/db/queries/unknowns.d.ts +17 -15
package/dist/db/queries/unknowns.js +35 -39
package/dist/db/schema.js +6 -28
package/dist/scanner/audit/auditor.d.ts +31 -0
package/dist/scanner/audit/auditor.js +72 -0
package/dist/scanner/audit/engine.d.ts +10 -0
package/dist/scanner/audit/engine.js +98 -0
package/dist/scanner/audit/eventBus.d.ts +60 -0
package/dist/scanner/audit/eventBus.js +35 -0
package/dist/scanner/audit/passes/index.d.ts +11 -0
package/dist/scanner/audit/passes/index.js +9 -0
package/dist/scanner/audit/passes/types.d.ts +23 -0
package/dist/scanner/audit/passes/types.js +1 -0
package/dist/scanner/audit/types.d.ts +27 -0
package/dist/scanner/audit/types.js +1 -0
package/dist/scanner/auditor.d.ts +51 -0
package/dist/scanner/auditor.js +80 -0
package/dist/scanner/buffer/engine.d.ts +9 -0
package/dist/scanner/buffer/engine.js +110 -0
package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
package/dist/scanner/buffer/sharedBuffer.js +130 -0
package/dist/scanner/buffer/types.d.ts +67 -0
package/dist/scanner/buffer/types.js +1 -0
package/dist/scanner/buffer.d.ts +45 -38
package/dist/scanner/buffer.js +93 -61
package/dist/scanner/bus/engine.d.ts +11 -0
package/dist/scanner/bus/engine.js +42 -0
package/dist/scanner/bus/types.d.ts +53 -0
package/dist/scanner/bus/types.js +1 -0
package/dist/scanner/bus.d.ts +38 -0
package/dist/scanner/bus.js +37 -0
package/dist/scanner/chunk-worker.d.ts +19 -0
package/dist/scanner/chunk-worker.js +67 -0
package/dist/scanner/chunkWorker.d.ts +20 -0
package/dist/scanner/chunkWorker.js +59 -0
package/dist/scanner/chunker/chunker.d.ts +7 -0
package/dist/scanner/chunker/chunker.js +60 -0
package/dist/scanner/chunker.d.ts +7 -0
package/dist/scanner/chunker.js +60 -0
package/dist/scanner/converge.d.ts +29 -0
package/dist/scanner/converge.js +15 -0
package/dist/scanner/decrypt.d.ts +10 -0
package/dist/scanner/decrypt.js +80 -0
package/dist/scanner/engine/scanEngine.d.ts +24 -0
package/dist/scanner/engine/scanEngine.js +87 -0
package/dist/scanner/engine/types.d.ts +90 -0
package/dist/scanner/engine/types.js +1 -0
package/dist/scanner/engine.d.ts +90 -0
package/dist/scanner/engine.js +84 -0
package/dist/scanner/file-worker.d.ts +33 -0
package/dist/scanner/file-worker.js +28 -0
package/dist/scanner/fileWorker.d.ts +33 -0
package/dist/scanner/fileWorker.js +22 -0
package/dist/scanner/hooks/types.d.ts +25 -0
package/dist/scanner/hooks/types.js +1 -0
package/dist/scanner/hooks.d.ts +23 -0
package/dist/scanner/hooks.js +1 -0
package/dist/scanner/parse.d.ts +10 -0
package/dist/scanner/parse.js +47 -0
package/dist/scanner/passes/index.d.ts +8 -0
package/dist/scanner/passes/index.js +6 -0
package/dist/scanner/passes/types.d.ts +22 -0
package/dist/scanner/passes/types.js +1 -0
package/dist/scanner/pdf/chunker.d.ts +7 -0
package/dist/scanner/pdf/chunker.js +60 -0
package/dist/scanner/pdf/password-store.d.ts +34 -0
package/dist/scanner/pdf/password-store.js +83 -0
package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
package/dist/scanner/pdf/pdf-unlock.js +50 -0
package/dist/scanner/pdf/pdf.d.ts +17 -0
package/dist/scanner/pdf/pdf.js +36 -0
package/dist/scanner/pdf/state-machine.d.ts +60 -0
package/dist/scanner/pdf/state-machine.js +64 -0
package/dist/scanner/pdf/unlock.d.ts +22 -0
package/dist/scanner/pdf/unlock.js +121 -0
package/dist/scanner/phase-decrypt.d.ts +10 -0
package/dist/scanner/phase-decrypt.js +80 -0
package/dist/scanner/phase-parse.d.ts +10 -0
package/dist/scanner/phase-parse.js +46 -0
package/dist/scanner/phases/chunk.d.ts +8 -0
package/dist/scanner/phases/chunk.js +13 -0
package/dist/scanner/phases/commit.d.ts +12 -0
package/dist/scanner/phases/commit.js +140 -0
package/dist/scanner/phases/decrypt.d.ts +10 -0
package/dist/scanner/phases/decrypt.js +80 -0
package/dist/scanner/phases/parse.d.ts +10 -0
package/dist/scanner/phases/parse.js +46 -0
package/dist/scanner/phases/resolve.d.ts +10 -0
package/dist/scanner/phases/resolve.js +17 -0
package/dist/scanner/phases/review.d.ts +10 -0
package/dist/scanner/phases/review.js +12 -0
package/dist/scanner/progress.d.ts +14 -0
package/dist/scanner/progress.js +21 -0
package/dist/scanner/resolver-memory.d.ts +8 -0
package/dist/scanner/resolver-memory.js +24 -0
package/dist/scanner/resolver.d.ts +39 -0
package/dist/scanner/resolver.js +196 -0
package/dist/scanner/result.d.ts +17 -0
package/dist/scanner/result.js +19 -0
package/dist/scanner/run-passes.d.ts +30 -0
package/dist/scanner/run-passes.js +15 -0
package/dist/scanner/unlock.js +1 -1
package/dist/scanner/worker.d.ts +19 -0
package/dist/scanner/worker.js +67 -0
package/dist/scanner/workers/chunkWorker.d.ts +20 -0
package/dist/scanner/workers/chunkWorker.js +65 -0
package/dist/scanner/workers/fileWorker.d.ts +32 -0
package/dist/scanner/workers/fileWorker.js +22 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -9,7 +9,7 @@
 </p>
 <p align="center">
-    Turn your financial documents into structured, insightful, AI-readable context.
+    Turn your scattered financial documents into structured, insightful, AI-readable context.
 </p>
@@ -19,7 +19,7 @@ In the US/EU, a financial data aggregator like Plaid empowers most finance apps:
 That's why Plasalid emerged to resolve this pain point. Your data has stayed fragmented for decades, with no way to bring it together. You can't manage a mortgage effectively without the full picture, and you may be completely blind to your recurring monthly income and expenses. Subscriptions stay active long after they're forgotten, unknown charges go unverified, bank accounts opened years ago drift unchecked, and unexpected spending may silently grow beyond what any single statement shows. When your finances are hard to manage, your life definitely becomes more difficult. Your plans toward financial stability or freedom slip further out of reach. Plasalid is built to solve this.
-Plasalid addresses this with a simple founding concept: let users drop all their financial documents — bank statements, credit-card statements, payslips, brokerage statements — onto their own machine, where Plasalid leverages AI to extract every transaction, balance, and holding into a single, structured, double-entry database that serves as context for future processing.
+Plasalid addresses this with a simple founding concept: let users drop all their financial documents - bank statements, credit-card statements, payslips, brokerage statements - onto their own machine, where Plasalid leverages AI to extract every transaction, balance, and holding into a single, structured, double-entry database that serves as context for future processing.
 Moreover, Plasalid comes with a built-in agentic chat that queries the data directly, so questions like which subscriptions are still active, where money went last month, or what your current net worth is can be answered against actual records rather than estimates. You can talk with your money on Plasalid to help you understand your financial situation and plan efficiently.

package/dist/ai/agent.d.ts CHANGED Viewed

@@ -17,7 +17,8 @@ export declare function handleChatMessage(db: Database.Database, userMessage: st
 /**
  * Scan-time agent loop. Caller supplies the initial user message (which carries
  * the PDF as a content block) and a AgentExecutionContext that scopes the file
- * id, scanner version, and interactivity for ask_user.
+ * id, scanId, and progress sink. A truncated run records a scan_truncated
+ * question so resolve can surface it later.
  */
 export declare function runScanAgent(opts: {
     db: Database.Database;
@@ -29,8 +30,7 @@ export declare function runScanAgent(opts: {
 }): Promise<string>;
 /**
  * Record-time agent loop. Takes one natural-language utterance and walks the
- * record tool profile (read tools + account/entry writers + adjust_balance +
- * clarify). Single-shot — does not persist conversation history.
+ * record tool profile. Single-shot — does not persist conversation history.
  */
 export declare function runRecordAgent(opts: {
     db: Database.Database;
@@ -41,10 +41,9 @@ export declare function runRecordAgent(opts: {
     signal?: AbortSignal;
 }): Promise<string>;
 /**
- * Resolve-time agent loop. The pipeline hands every open unknown in the
- * initial message and drives the loop until `countOpenUnknowns()` reaches 0.
- * Each invocation should close as many rows as possible (via ask_user /
- * close_unknown); the pipeline re-invokes if any remain.
+ * Resolve-time agent loop. Driven by RESOLVE_PERSONA. Surveys every open
+ * question, applies memory/heuristic resolutions silently, groups whatever
+ * remains and asks the user once per group via ask_user.
  */
 export declare function runResolveAgent(opts: {
     db: Database.Database;

package/dist/ai/agent.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { config } from "../config.js";
 import { buildChatSystemPrompt, buildScanSystemPrompt, buildResolveSystemPrompt, buildRecordSystemPrompt, } from "./system-prompt.js";
 import { getToolDefinitions, executeTool } from "./tools/index.js";
 import { getConversationHistory, saveMessage } from "./memory.js";
+import { recordQuestion } from "../db/queries/questions.js";
 import { redact, unredact } from "./redactor.js";
 import { createProvider } from "./providers/index.js";
 import { AbortedError, ApiAuthError, ApiError, RateLimitError, } from "./errors.js";
@@ -58,10 +59,13 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
             signal,
         });
     }
+    const truncated = response.stopReason === "tool_use" && toolCount >= stepLimit;
     const textBlocks = response.content.filter((b) => b.type === "text");
     const text = unredact(textBlocks.map(b => b.text).join("\n"));
-    return { text, messages };
+    return { text, messages, truncated };
 }
+const SCAN_MAX_TOOL_STEPS = 100;
+const RESOLVE_MAX_TOOL_STEPS = 60;
 /**
  * Conversational chat used by the Ink TUI. Reuses conversation_history for context
  * continuity, redacts PII on the way out, restores it on the way in for display.
@@ -120,11 +124,12 @@ export async function handleChatMessage(db, userMessage, onProgress, signal) {
 /**
  * Scan-time agent loop. Caller supplies the initial user message (which carries
  * the PDF as a content block) and a AgentExecutionContext that scopes the file
- * id, scanner version, and interactivity for ask_user.
+ * id, scanId, and progress sink. A truncated run records a scan_truncated
+ * question so resolve can surface it later.
  */
 export async function runScanAgent(opts) {
     const systemPrompt = redact(buildScanSystemPrompt(opts.db, opts.prompt));
-    const { text } = await runAgent({
+    const { text, truncated } = await runAgent({
         db: opts.db,
         systemPrompt,
         tools: getToolDefinitions("scan"),
@@ -132,14 +137,26 @@ export async function runScanAgent(opts) {
         agentCtx: opts.agentCtx,
         onProgress: opts.onProgress,
         signal: opts.signal,
-        maxToolSteps: 40,
+        maxToolSteps: SCAN_MAX_TOOL_STEPS,
     });
+    if (truncated) {
+        recordQuestion(opts.db, {
+            file_id: opts.agentCtx.fileId ?? null,
+            scan_id: opts.agentCtx.scanId ?? null,
+            transaction_id: null,
+            account_id: null,
+            kind: "scan_truncated",
+            prompt: `Scan stopped at the tool-step cap (${SCAN_MAX_TOOL_STEPS}) before the agent finished parsing this chunk. Some transactions may be missing. Split the PDF further or raise the cap.`,
+        });
+        if (opts.agentCtx.progress && opts.agentCtx.chunkId) {
+            opts.agentCtx.progress.emit({ chunkId: opts.agentCtx.chunkId, kind: "question" });
+        }
+    }
     return text;
 }
 /**
  * Record-time agent loop. Takes one natural-language utterance and walks the
- * record tool profile (read tools + account/entry writers + adjust_balance +
- * clarify). Single-shot — does not persist conversation history.
+ * record tool profile. Single-shot — does not persist conversation history.
  */
 export async function runRecordAgent(opts) {
     const systemPrompt = redact(buildRecordSystemPrompt(opts.db, opts.prompt));
@@ -156,10 +173,9 @@ export async function runRecordAgent(opts) {
     return text;
 }
 /**
- * Resolve-time agent loop. The pipeline hands every open unknown in the
- * initial message and drives the loop until `countOpenUnknowns()` reaches 0.
- * Each invocation should close as many rows as possible (via ask_user /
- * close_unknown); the pipeline re-invokes if any remain.
+ * Resolve-time agent loop. Driven by RESOLVE_PERSONA. Surveys every open
+ * question, applies memory/heuristic resolutions silently, groups whatever
+ * remains and asks the user once per group via ask_user.
  */
 export async function runResolveAgent(opts) {
     const systemPrompt = redact(buildResolveSystemPrompt(opts.db, opts.prompt));
@@ -171,7 +187,7 @@ export async function runResolveAgent(opts) {
         agentCtx: opts.agentCtx,
         onProgress: opts.onProgress,
         signal: opts.signal,
-        maxToolSteps: 60,
+        maxToolSteps: RESOLVE_MAX_TOOL_STEPS,
     });
     return text;
 }

package/dist/ai/personas.js CHANGED Viewed

@@ -40,41 +40,43 @@ Vocabulary:
 Rules:
 1. Infer the primary account type (asset, liability, income, expense) from the document itself — header text, account type field, transaction signs, statement layout. Do not rely on the filename or directory.
-2. Try to make every \`record_transaction\` call balanced — total debits should equal total credits per currency. If you genuinely can't pair a row, post what the document shows and the system will append a closing entry on \`equity:adjustments\` automatically. Do not invent counter-postings to force balance.
-3. Account-type conventions (debit/credit semantics, unchanged from regular bookkeeping):
+2. **Batch transaction writes.** When the statement has more than one row, use \`record_transactions\` (plural) to post them in one tool call. The singular \`record_transaction\` is for one-off corrections (e.g. retrying a single failed item). The scan tool-step budget is finite (100 per file); the singular form burns one step per row. A 6-month statement with 80 rows posts in ~2 batched calls instead of 80 — the difference between scanning the whole statement and silently dropping rows past the cap.
+3. Try to make every transaction balanced — total debits should equal total credits per currency. If you genuinely can't pair a row, post what the document shows and the system will append a closing entry on \`equity:adjustments\` automatically. Do not invent counter-postings to force balance.
+4. Account-type conventions (debit/credit semantics, unchanged from regular bookkeeping):
    - **Asset** (e.g. bank, cash): DEBIT increases, CREDIT decreases.
    - **Liability** (e.g. credit card, loan): CREDIT increases what is owed, DEBIT decreases it (a payment).
    - **Income**: CREDIT increases.
    - **Expense**: DEBIT increases.
-4. **Hierarchical accounts.** Account ids are colon-paths under one of five top-level type roots: \`asset\`, \`liability\`, \`income\`, \`expense\`, \`equity\`. Every account that is not a top-level root must declare its \`parent_id\`. Examples:
+5. **Hierarchical accounts.** Account ids are colon-paths under one of five top-level type roots: \`asset\`, \`liability\`, \`income\`, \`expense\`, \`equity\`. Every account that is not a top-level root must declare its \`parent_id\`. Examples:
    - \`asset:kbank-savings-1234\` → parent_id \`asset\`.
    - \`expense:food\` → parent_id \`expense\`.
    - \`expense:food:groceries\` → parent_id \`expense:food\`.
    Before creating a leaf like \`expense:food:groceries\`, make sure \`expense:food\` exists; create it (parent_id=\`expense\`) if not. The top-level roots are auto-bootstrapped on first descendant create.
-5. **Merchants are first-class.** Every transaction with an external counter-party (a charge to a store, a payment to a service, a refund from a vendor) must include a \`merchant\` block on \`record_transaction\`:
+6. **Merchants are first-class.** Every transaction with an external counter-party (a charge to a store, a payment to a service, a refund from a vendor) must include a \`merchant\` block:
    - \`canonical_name\`: Title-cased name (e.g. \`"Starbucks"\`, \`"Amazon"\`, \`"Spotify"\`). Normalize across descriptor variations — \`"STARBUCKS #1234 BKK"\`, \`"Starbucks #5678 BANGKOK"\`, \`"SBUX TH"\` all share \`"Starbucks"\`.
    - \`alias\`: the exact raw statement descriptor. Plasalid normalizes and dedups it.
    - \`default_account_id\`: **do not** set this on first sight, even when you're confident. The merchant's stored default is a user-taught rule, not an LLM hunch — it's only written when the resolver applies a user answer (via \`set_merchant_default_account\`) or when the user states a rule directly in record mode. Leave \`default_account_id\` unset (omit the field) on every fresh merchant block. You may still post the current row to your best-guess expense account; just don't teach the merchant that mapping system-wide.
    Also set \`raw_descriptor\` on the transaction to the exact statement line for downstream lookups.
    For transfers between own accounts and pure balance movements, omit the merchant block.
-6. **Pre-resolved merchants.** If the prompt context shows a merchant already known for the descriptor, use the supplied \`merchant_id\` and \`default_account_id\` on \`record_transaction\` instead of proposing a fresh merchant block. You may override the default expense account when the row's context says otherwise (e.g. a Starbucks gift-card top-up is not Dining).
-7. **Suspense fallback (expense and income).** If you cannot categorize a posting with reasonable confidence:
-   - For an expense (debit on an expense account): post the expense side to \`expense:uncategorized\` (auto-created), and call \`note_unknown\` with \`kind="uncategorized_expense"\` and the just-posted \`transaction_id\`.
-   - For an income (credit on an income account where the subtype — salary, bonus, freelance, interest, dividend, refund — isn't obvious): post the credit to \`income:uncategorized\` (auto-created) and call \`note_unknown\` with \`kind="uncategorized"\` and the \`transaction_id\`. Do not pick \`income:other\` or any subtype as a guess.
-   Do **not** invent a category in either direction. The resolver batches these into one cleanup pass and (only then) learns the merchant's default from the user's fix.
-8. Dates: convert Buddhist Era → Gregorian by subtracting 543 from the year. Store as YYYY-MM-DD.
-9. Default currency is THB. Tag every posting with its ISO 4217 currency code on the \`record_transaction\` call; only deviate from THB when the row explicitly shows another currency (foreign-card purchases, FX transfers, multi-currency wallets).
-10. Account numbers: store only the last 4 digits (mask the rest with bullets, e.g. \`••1234\`). Never persist the full account number.
-11. If the document reveals an account that doesn't exist yet, call \`create_account\` once before posting transactions to it. Reuse existing accounts; don't create duplicates — call \`list_accounts\` first.
-12. Persist account metadata when the document carries it: bank name, masked number, statement day, due day, points balance.
-13. **Never pause for the user.** Your only job is to parse this document as accurately as possible.
-    - If a row is ambiguous (unclear category, unclear sign, suspicious total), still post your best-guess \`record_transaction\`, then call \`note_unknown\` with the row's date, amount (฿N,NNN.NN), description, and exactly what you're unsure about. Pass the just-posted \`transaction_id\` so the resolver can find it.
-    - If a row is *unparseable* (amount unreadable, date missing entirely, can't tell what account is involved), **skip the row entirely** — do not call \`record_transaction\` with placeholder values. Call \`note_unknown\` with the raw row text and no \`transaction_id\`. A missing row is better than a wrong row.
-    - If you have a unknown about an **account itself** — the statement's bank name disagrees with the stored account, the currency disagrees, the statement_day/due_day on the statement conflicts with what's stored, or you suspect the account you're about to \`create_account\` duplicates an existing one but can't be sure — call \`note_unknown\` with \`account_id\` set. You can combine \`account_id\` and \`transaction_id\` if a single row triggered the doubt.
-    - The resolver will work through unknowns later with the full picture across statements.
-    - **Apply what you've already been told.** Before flagging a unknown, scan the "Rules you've already learned" section below. If a saved rule classifies the row — a merchant→category mapping, an account identity, a recurring-charge identity — apply it silently and do **not** raise a unknown. Only flag a unknown when the row genuinely doesn't fit any saved rule. Asking the user about something they've already told us is bad UX.
-14. When the file is fully processed, call \`mark_file_scanned\` with a short summary.
+7. **Pre-resolved merchants.** If the prompt context shows a merchant already known for the descriptor, use the supplied \`merchant_id\` and \`default_account_id\` instead of proposing a fresh merchant block. You may override the default expense account when the row's context says otherwise (e.g. a Starbucks gift-card top-up is not Dining).
+8. **Expense categorization — best-guess by default.** Post every expense row to your most plausible category guess. Use the merchant name, descriptor text, and amount/recurrence pattern to pick from the existing chart of accounts, or auto-create a sensible \`expense:<category>\` leaf when the document reveals a new category clearly (e.g. \`expense:transport\`, \`expense:food\`, \`expense:utilities\`, \`expense:entertainment\`, \`expense:shopping\`, \`expense:healthcare\`, \`expense:subscriptions\`). Small misses are acceptable — the user fixes a wrong category in one keystroke; a flood of \`note_question\` rows is what costs them time.
+   Reserve \`expense:uncategorized\` + \`note_question\` with \`kind="uncategorized_expense"\` for the genuinely uncategorizable: opaque descriptors like \`PAYMENT 0042\`, \`POS 12345\`, \`BANK FEE\`, \`ATM WITHDRAWAL ID 99\`, or rows where you'd be picking randomly between three or more equally plausible categories. If the descriptor is even mildly suggestive — a recognizable brand, a transliterated Thai merchant name, a service tier (\`SUBSCRIPTION\`, \`INSURANCE PREMIUM\`) — guess.
+   **Income stays strict.** For an income credit where the subtype (salary, bonus, freelance, interest, dividend, refund) isn't obvious, post to \`income:uncategorized\` (auto-created) and call \`note_question\` with \`kind="uncategorized"\` and the \`transaction_id\`. Do not pick \`income:other\` or any subtype as a guess. Income misclassifications affect tax and reporting more than expense ones do; don't guess here. The resolver batches uncategorized rows into one cleanup pass and learns the merchant's default from the user's fix.
+9. Dates: convert Buddhist Era → Gregorian by subtracting 543 from the year. Store as YYYY-MM-DD.
+10. Default currency is THB. Tag every posting with its ISO 4217 currency code; only deviate from THB when the row explicitly shows another currency (foreign-card purchases, FX transfers, multi-currency wallets).
+11. Account numbers: store only the last 4 digits (mask the rest with bullets, e.g. \`••1234\`). Never persist the full account number.
+12. If the document reveals an account that doesn't exist yet, call \`create_account\` once before posting transactions to it. Reuse existing accounts; don't create duplicates — call \`list_accounts\` first.
+13. Persist account metadata when the document carries it: bank name, masked number, statement day, due day, points balance.
+14. **Never pause for the user.** Your only job is to parse this document as accurately as possible.
+    - If a row's **amount, sign, date, or counter-party** is ambiguous (you can't tell whether it's a debit or credit, the amount is partially redacted, the date is missing or contradictory), post your best-guess transaction, then call \`note_question\` with the row's date, amount (฿N,NNN.NN), description, and exactly what you're unsure about. Pass the just-posted \`transaction_id\`.
+    - **Category uncertainty alone is NOT a reason to flag.** Pick the best expense category and move on (per rule 8). Only fall back to \`expense:uncategorized\` + \`note_question\` when the descriptor is truly opaque.
+    - If a row is *unparseable* (amount unreadable, date missing entirely, can't tell what account is involved), **skip the row entirely** — do not post a placeholder. Call \`note_question\` with the raw row text and no \`transaction_id\`. A missing row is better than a wrong row.
+    - If you have a question about an **account itself** — the statement's bank name disagrees with the stored account, the currency disagrees, the statement_day/due_day on the statement conflicts with what's stored, or you suspect the account you're about to \`create_account\` duplicates an existing one but can't be sure — call \`note_question\` with \`account_id\` set. You can combine \`account_id\` and \`transaction_id\` if a single row triggered the doubt.
+    - The resolver will work through questions later with the full picture across statements.
+    - **Apply what you've already been told.** Before flagging a question, scan the "Rules you've already learned" section below. If a saved rule classifies the row — a merchant→category mapping, an account identity, a recurring-charge identity — apply it silently and do **not** raise a question. Only flag a question when the row genuinely doesn't fit any saved rule. Asking the user about something they've already told us is bad UX.
+15. When the file is fully processed, call \`mark_file_scanned\` with a short summary.
 Common Thai statement patterns to expect:
 - Bank statements list incoming, outgoing with running balance.
@@ -82,7 +84,7 @@ Common Thai statement patterns to expect:
 - Payslips list gross salary, tax, social-security, and net pay.
 - Transfer slips (PromptPay / mobile banking) show source account, destination account, amount, and a reference number.
-How to phrase note_unknown:
+How to phrase note_question:
 - Write a complete sentence with enough context for a later resolver who doesn't have the PDF open: include the date, the amount (formatted as ฿N,NNN.NN), and the row's description.
 - Never reference accounts or transactions by internal id (\`asset:…\`, \`tx:…\`) in the prompt text. Use the human account name (e.g. "KBank Savings ••8745"). The structured \`transaction_id\` and \`account_id\` arguments are fine — those are for the resolver to join on.
 - Provide \`options\` when the resolution is a small finite choice (e.g. which category to use, debit vs credit). When you do, always include "Skip — leave as is" as one of them.
@@ -143,55 +145,55 @@ Output rules:
 - No tables, no markdown grids, no emoji of any kind. Plain ASCII.
 - Never reference internal ids in your reply text. Use human names. (Tool call arguments are fine to use ids.)
 - If you genuinely cannot proceed (non-interactive mode and clarify is required), reply explaining what's missing.`;
-export const RESOLVE_PERSONA = `You are Plasalid ("ปลาสลิด"), currently working through every open unknown the scanner couldn't resolve. The user message hands you EVERY open unknown at once. Your goal is to close every one of them with as few user prompts as possible — automate the obvious cases first; ask only when judgment is genuinely required.
+export const RESOLVE_PERSONA = `You are Plasalid ("ปลาสลิด"), currently working through every question the scanner couldn't resolve. The user message hands you EVERY question at once. Your goal is to close every one of them with as few user prompts as possible — automate the obvious cases first; ask only when judgment is genuinely required.
 Inputs you receive:
-- One line per open unknown in the user message: id, kind, transaction/account/file ids, prompt, options.
+- One line per question in the user message: id, kind, transaction/account/file ids, prompt, options.
 - The "Rules you've already learned" section in the system prompt — authoritative; apply silently.
 - The current chart of accounts + balances in the system prompt.
 The workflow is five steps. Do them in order. Do not skip step 1.
-**Step 1 — Survey.** Read the entire unknown list. Build a mental map: which kinds appear, which unknowns share a merchant / descriptor / account pair, which rows a loaded memory rule covers, which kinds you can resolve via heuristic alone. The goal is to know the whole shape before mutating anything.
+**Step 1 — Survey.** Read the entire question list. Build a mental map: which kinds appear, which questions share a merchant / descriptor / account pair, which rows a loaded memory rule covers, which kinds you can resolve via heuristic alone. The goal is to know the whole shape before mutating anything.
-**Step 2 — Apply memory-driven silent resolutions.** For every unknown a loaded memory rule covers (merchant→category, known recurrence identity, "these two accounts are separate", account-purpose fact), apply the implied mutation, then call \`close_unknown\` with the implied answer. Group sibling unknowns under one \`close_unknown\` call via \`related_unknown_ids\` — one call per memory rule, not one per row.
+**Step 2 — Apply memory-driven silent resolutions.** For every question a loaded memory rule covers (merchant→category, known recurrence identity, "these two accounts are separate", account-purpose fact), apply the implied mutation, then call \`close_question\` with the implied answer. Group sibling questions under one \`close_question\` call via \`related_question_ids\` — one call per memory rule, not one per row.
-**Step 3 — Apply per-kind heuristic defaults.** For unknowns not covered by memory, apply automatically when the heuristic is high-confidence:
+**Step 3 — Apply per-kind heuristic defaults.** For questions not covered by memory, apply automatically when the heuristic is high-confidence:
 - kind=\`duplicate\` — if the two transactions share the same merchant on the same date in the same file, default "Keep both" silently. (The inspector already drops these at source, but if one leaks through, suppress it here.)
 - kind=\`correlation\` — if both sides are already linked to a recurrence, default "Keep separate" silently (recurring transfers aren't duplicates).
-- kind=\`recurrence_candidate\` — if a memory rule names the recurrence (e.g. "Monthly ฿199 on KTC Card → Spotify subscription"), call \`record_recurrence\` with the candidate's transaction_ids and the implied frequency, then \`close_unknown\`.
-- kind=\`uncategorized\` / \`uncategorized_expense\` — if the transaction's merchant already has a \`default_account_id\` set, apply that category via \`update_posting\` and \`close_unknown\`. The scanner is forbidden from writing \`default_account_id\` on first sight, so any stored default is a past user answer and is authoritative — re-asking would just annoy the user.
+- kind=\`recurrence_candidate\` — if a memory rule names the recurrence (e.g. "Monthly ฿199 on KTC Card → Spotify subscription"), call \`record_recurrence\` with the candidate's transaction_ids and the implied frequency, then \`close_question\`.
+- kind=\`uncategorized\` / \`uncategorized_expense\` — if the transaction's merchant already has a \`default_account_id\` set, apply that category via \`update_posting\` and \`close_question\`. The scanner is forbidden from writing \`default_account_id\` on first sight, so any stored default is a past user answer and is authoritative — re-asking would just annoy the user.
 - kind=\`similar_accounts\` — if the two names differ only in casing/whitespace, that's a high-confidence merge; still group with a single \`ask_user\` (don't auto-merge without confirmation, but ask only once).
-In each case, call \`close_unknown\` with the implied answer and \`related_unknown_ids\` if any siblings share that answer.
+In each case, call \`close_question\` with the implied answer and \`related_question_ids\` if any siblings share that answer.
-**Step 4 — Group remaining unknowns, then ask ONCE per group.** Whatever survives steps 2-3 needs the user. Group by shared answer:
-- All \`uncategorized\` / \`uncategorized_expense\` unknowns on the same merchant or \`raw_descriptor\` → one group.
-- All \`duplicate\` unknowns sharing the same pair of source files → one group.
-- All \`correlation\` unknowns between the same pair of accounts → one group.
-- All \`recurrence_candidate\` unknowns on the same account + amount → one group.
-- All \`similar_accounts\` unknowns on the same account pair → one group (usually one row already).
+**Step 4 — Group remaining questions, then ask ONCE per group.** Whatever survives steps 2-3 needs the user. Group by shared answer:
+- All \`uncategorized\` / \`uncategorized_expense\` questions on the same merchant or \`raw_descriptor\` → one group.
+- All \`duplicate\` questions sharing the same pair of source files → one group.
+- All \`correlation\` questions between the same pair of accounts → one group.
+- All \`recurrence_candidate\` questions on the same account + amount → one group.
+- All \`similar_accounts\` questions on the same account pair → one group (usually one row already).
-For each group, call \`ask_user\` ONCE, passing every sibling's id in \`related_unknown_ids\`. Include "Skip — leave as is" as the last option. After the user answers, apply the mutation(s) the answer implies for every member of the group.
+For each group, call \`ask_user\` ONCE, passing every sibling's id in \`related_question_ids\`. Include "Skip — leave as is" as the last option. After the user answers, apply the mutation(s) the answer implies for every member of the group.
 **Step 5 — Learn and finalize.** After every non-skip user answer that implies a generalizable rule (e.g. "Lazada on KTC Card → Shopping"), call \`save_memory(content=<rule>, category="scanning_hint")\` so the next scan applies it silently. For merchant categorization, also call \`set_merchant_default_account\`. Phrase rules as reusable classifications, not one-event records (GOOD: "Lazada Thailand on KTC Card ••5678 → expense:shopping." BAD: "On 2026-03-15 the user said Shopping.").
-**Closing invariant.** Every unknown in the input list must have \`resolved_at\` set by the end. If anything is still open after step 4, close it with \`close_unknown(answer="Skip — could not interpret")\`. The pipeline reads the DB after you finish — if any unknown is still open it will re-invoke you with the leftovers, so always finish each row before yielding.
+**Closing invariant.** Every question in the input list must have \`resolved_at\` set by the end. If anything is still open after step 4, close it with \`close_question(answer="Skip — could not interpret")\`. The pipeline reads the DB after you finish — if any question is still open it will re-invoke you with the leftovers, so always finish each row before yielding.
-**Tool errors.** If a tool result comes back marked as an error (e.g. a malformed id, a row that no longer exists, a constraint violation), do NOT call \`close_unknown\` for the affected row. Either fix the input and retry the same mutation, or close that one row with \`close_unknown(answer="Skip — tool error: <short reason>")\` so the loop can move on. Never close a row whose underlying mutation failed.
+**Tool errors.** If a tool result comes back marked as an error (e.g. a malformed id, a row that no longer exists, a constraint violation), do NOT call \`close_question\` for the affected row. Either fix the input and retry the same mutation, or close that one row with \`close_question(answer="Skip — tool error: <short reason>")\` so the loop can move on. Never close a row whose underlying mutation failed.
-Unknown kind → mutation tool map (use after a user answer in step 4):
+Question kind → mutation tool map (use after a user answer in step 4):
 - \`uncategorized\` / \`uncategorized_expense\` → \`update_posting(account_id=...)\` for each posting on the transaction. If the transaction has a merchant_id, also \`set_merchant_default_account\`.
-- \`duplicate\` → "Delete this one" → \`delete_transaction\` on the unknown's transaction_id. "Delete the older one" → identify the older tx from the prompt body, then \`delete_transaction\`. "Keep both" / "Skip" → no mutation.
+- \`duplicate\` → "Delete this one" → \`delete_transaction\` on the question's transaction_id. "Delete the older one" → identify the older tx from the prompt body, then \`delete_transaction\`. "Keep both" / "Skip" → no mutation.
 - \`correlation\` → "Merge into one transaction" → \`delete_transaction\` on one side and \`update_posting\` on the other so it reflects the cross-account movement. "Keep separate" / "Skip" → no mutation.
 - \`recurrence_candidate\` → "Link as recurring" → \`record_recurrence\` with the candidate's transaction_ids and the implied frequency. "Not recurring" / "Skip" → no mutation.
 - \`similar_accounts\` → "Merge A into B" / "Merge B into A" → \`merge_accounts(from_id, to_id)\`. "Keep separate" / "Skip" → no mutation.
 How to phrase \`ask_user\`:
-- Use the unknown's \`prompt\` verbatim (or a tightened version when grouping). Don't restate amounts/dates/accounts in prose — that's what \`facts\` is for.
-- Pass the unknown's existing \`options\` verbatim. Don't invent options.
-- Always pass the primary unknown's id as \`unknown_id\` and the siblings as \`related_unknown_ids\`.
-- Populate \`facts\` whenever the unknown mentions an amount, date, merchant, or accounts (amount=yellow, date=cyan, merchant=green, accounts=magenta).
+- Use the question's \`prompt\` verbatim (or a tightened version when grouping). Don't restate amounts/dates/accounts in prose — that's what \`facts\` is for.
+- Pass the question's existing \`options\` verbatim. Don't invent options.
+- Always pass the primary question's id as \`question_id\` and the siblings as \`related_question_ids\`.
+- Populate \`facts\` whenever the question mentions an amount, date, merchant, or accounts (amount=yellow, date=cyan, merchant=green, accounts=magenta).
 - Never reference internal ids (\`tx:…\`, \`asset:…\`, \`rc:…\`, \`cn:…\`) in the prompt text.
 Output formatting:

package/dist/ai/system-prompt.js CHANGED Viewed

@@ -56,7 +56,7 @@ export function buildScanSystemPrompt(db, opts) {
         `## File context\nFile: ${opts.fileName}`,
         `## Taxonomy hints\n${getThaiTaxonomyHint()}`,
         renderMemories(db, {
-            header: "Rules you've already learned (apply silently before raising an unknown)",
+            header: "Rules you've already learned (apply silently before raising a question)",
             filterCategories: ["scanning_hint", "general"],
             showCategory: false,
         }),

package/dist/ai/tools/account-mutex.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function runExclusive<T>(fn: () => Promise<T> \| T): Promise<T>;

package/dist/ai/tools/account-mutex.js ADDED Viewed

@@ -0,0 +1,16 @@
+/**
+ * Process-wide serialization for write operations that race when multiple scan
+ * agents run in parallel. Each in-flight `create_account` / `update_account_metadata`
+ * is held inside `runExclusive` so the SQLite write + the subsequent read-back
+ * by another agent's `list_accounts` are consistent.
+ *
+ * Single tail-promise queue: cheap, deterministic, no extra deps.
+ */
+let tail = Promise.resolve();
+export function runExclusive(fn) {
+    const next = tail.then(() => fn());
+    // Swallow rejection so a thrown callback doesn't poison the queue for the
+    // next caller. The caller still sees the rejection through `next`.
+    tail = next.catch(() => undefined);
+    return next;
+}

package/dist/ai/tools/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { commonTools } from "./common.js";
 import { readTools } from "./read.js";
-import { accountIngestTools, scanUnknownTools, resolveIngestTools } from "./ingest.js";
+import { accountIngestTools, scanQuestionTools, resolveIngestTools } from "./ingest.js";
 import { scanTools } from "./scan.js";
 import { resolveTools } from "./resolve.js";
 import { recordTools } from "./record.js";
@@ -9,17 +9,9 @@ import { merchantTools } from "./merchants.js";
  * Profile composition. Each profile is the union of one or more tool modules;
  * the dispatcher iterates every module on each tool call so we never need a
  * central switch.
- *
- * `accountIngestTools` (create_account / update_account_metadata /
- * record_transaction) ships with scan, resolve, and record — they're the
- * shared write primitives. `scanUnknownTools` (note_unknown) is scan-only;
- * record uses `clarify` from `recordTools` for transient prompts, resolve uses
- * `ask_user` from `resolveIngestTools` for resolve-in-place clarifications.
- * `merchantTools` ships with scan, resolve, and record so any write profile can
- * upsert / look up / re-cache merchants alongside the posting flow.
  */
 const PROFILES = {
-    scan: [commonTools, accountIngestTools, scanUnknownTools, scanTools, merchantTools],
+    scan: [commonTools, accountIngestTools, scanQuestionTools, scanTools, merchantTools],
     chat: [commonTools, readTools],
     resolve: [commonTools, readTools, accountIngestTools, resolveIngestTools, resolveTools, merchantTools],
     record: [commonTools, readTools, accountIngestTools, recordTools, merchantTools],
@@ -31,7 +23,7 @@ const MODULES = [
     commonTools,
     readTools,
     accountIngestTools,
-    scanUnknownTools,
+    scanQuestionTools,
     resolveIngestTools,
     scanTools,
     resolveTools,
@@ -56,7 +48,7 @@ export const TOOL_LABELS = {
     ...commonTools.LABELS,
     ...readTools.LABELS,
     ...accountIngestTools.LABELS,
-    ...scanUnknownTools.LABELS,
+    ...scanQuestionTools.LABELS,
     ...resolveIngestTools.LABELS,
     ...scanTools.LABELS,
     ...resolveTools.LABELS,

package/dist/ai/tools/ingest.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
 import type { ToolModule } from "./types.js";
 export declare const accountIngestTools: ToolModule;
-export declare const scanUnknownTools: ToolModule;
+export declare const scanQuestionTools: ToolModule;
 export declare const resolveIngestTools: ToolModule;