plasalid 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +4 -0
  2. package/dist/ai/personas.js +29 -6
  3. package/dist/ai/prompt-sections.d.ts +10 -0
  4. package/dist/ai/prompt-sections.js +29 -0
  5. package/dist/ai/system-prompt.js +10 -6
  6. package/dist/ai/tools/clarify.js +35 -0
  7. package/dist/ai/tools/common.js +3 -2
  8. package/dist/ai/tools/index.js +6 -3
  9. package/dist/ai/tools/ingest.js +47 -35
  10. package/dist/ai/tools/mutate.d.ts +2 -0
  11. package/dist/ai/tools/mutate.js +81 -0
  12. package/dist/cli/commands/accounts.d.ts +1 -4
  13. package/dist/cli/commands/accounts.js +12 -101
  14. package/dist/cli/commands/files.d.ts +7 -0
  15. package/dist/cli/commands/files.js +24 -0
  16. package/dist/cli/commands/rules.d.ts +4 -12
  17. package/dist/cli/commands/rules.js +33 -67
  18. package/dist/cli/commands/scan.js +14 -12
  19. package/dist/cli/commands/status.js +5 -3
  20. package/dist/cli/commands/transactions.d.ts +0 -2
  21. package/dist/cli/commands/transactions.js +10 -63
  22. package/dist/cli/format.js +22 -32
  23. package/dist/cli/helper.d.ts +9 -1
  24. package/dist/cli/helper.js +17 -2
  25. package/dist/cli/index.js +37 -32
  26. package/dist/cli/ink/FilesBrowser.d.ts +7 -0
  27. package/dist/cli/ink/FilesBrowser.js +103 -0
  28. package/dist/cli/ink/ListBrowser.d.ts +16 -1
  29. package/dist/cli/ink/ListBrowser.js +36 -49
  30. package/dist/cli/ink/PromptFrame.js +1 -1
  31. package/dist/cli/ink/RulesBrowser.d.ts +7 -0
  32. package/dist/cli/ink/RulesBrowser.js +67 -0
  33. package/dist/cli/ink/ScanDashboard.js +90 -68
  34. package/dist/cli/ink/hooks/useFooterText.js +14 -22
  35. package/dist/cli/ink/keys.d.ts +2 -0
  36. package/dist/cli/ink/keys.js +19 -0
  37. package/dist/db/queries/files.d.ts +29 -0
  38. package/dist/db/queries/files.js +34 -0
  39. package/dist/db/queries/questions.d.ts +17 -0
  40. package/dist/db/queries/questions.js +47 -9
  41. package/dist/db/queries/rules.d.ts +31 -0
  42. package/dist/db/queries/rules.js +55 -0
  43. package/dist/db/queries/transactions.d.ts +34 -0
  44. package/dist/db/queries/transactions.js +86 -0
  45. package/dist/db/schema.js +17 -0
  46. package/dist/scanner/clarifier-memory.d.ts +15 -3
  47. package/dist/scanner/clarifier-memory.js +38 -17
  48. package/dist/scanner/clarifier.d.ts +2 -1
  49. package/dist/scanner/clarifier.js +40 -26
  50. package/dist/scanner/commit-pipeline.d.ts +56 -0
  51. package/dist/scanner/commit-pipeline.js +204 -0
  52. package/dist/scanner/committer.d.ts +56 -0
  53. package/dist/scanner/committer.js +204 -0
  54. package/dist/scanner/parse.js +27 -7
  55. package/dist/scanner/recurrence-pipeline.d.ts +28 -0
  56. package/dist/scanner/recurrence-pipeline.js +126 -0
  57. package/dist/scanner/recurrence.d.ts +28 -0
  58. package/dist/scanner/recurrence.js +155 -0
  59. package/dist/scanner/rule-keys.d.ts +13 -0
  60. package/dist/scanner/rule-keys.js +28 -0
  61. package/dist/scanner/rules.d.ts +13 -0
  62. package/dist/scanner/rules.js +28 -0
  63. package/dist/scanner/worker.js +4 -2
  64. package/package.json +1 -1
@@ -129,6 +129,87 @@ export function updatePosting(db, postingId, fields) {
129
129
  export function deleteTransaction(db, transactionId) {
130
130
  return db.prepare(`DELETE FROM transactions WHERE id = ?`).run(transactionId).changes;
131
131
  }
132
+ /**
133
+ * Backfill primitive. Update every posting matching the filter in one SQL
134
+ * UPDATE, return the affected count plus a sample of ids so the caller (often
135
+ * an AI tool) can quote evidence back to the user.
136
+ *
137
+ * Refuses to run without at least one filter field (no "update everything"
138
+ * escape hatch) and without at least one set field. Also refuses a no-op
139
+ * recategorization where `set.account_id` equals `filter.account_id` —
140
+ * agents shouldn't waste tool calls on identity transforms.
141
+ *
142
+ * Safe-field policy mirrors `updatePosting`: account_id + memo only.
143
+ * Amount/currency corrections must go through delete + re-record to keep
144
+ * the transaction's debit=credit invariant intact.
145
+ */
146
+ export function bulkUpdatePostings(db, filter, set) {
147
+ const filterFields = Object.keys(filter)
148
+ .filter((k) => filter[k] !== undefined && filter[k] !== "");
149
+ if (filterFields.length === 0) {
150
+ throw new Error("bulkUpdatePostings: at least one filter field is required.");
151
+ }
152
+ const setFields = Object.keys(set)
153
+ .filter((k) => set[k] !== undefined);
154
+ if (setFields.length === 0) {
155
+ throw new Error("bulkUpdatePostings: at least one set field is required.");
156
+ }
157
+ if (set.account_id !== undefined && set.account_id === filter.account_id) {
158
+ throw new Error("bulkUpdatePostings: set.account_id equals filter.account_id (no-op).");
159
+ }
160
+ const whereClauses = [];
161
+ const whereParams = [];
162
+ if (filter.account_id) {
163
+ whereClauses.push("p.account_id = ?");
164
+ whereParams.push(filter.account_id);
165
+ }
166
+ if (filter.currency) {
167
+ whereClauses.push("p.currency = ?");
168
+ whereParams.push(filter.currency);
169
+ }
170
+ if (filter.merchant_id) {
171
+ whereClauses.push("t.merchant_id = ?");
172
+ whereParams.push(filter.merchant_id);
173
+ }
174
+ if (filter.from) {
175
+ whereClauses.push("t.date >= ?");
176
+ whereParams.push(filter.from);
177
+ }
178
+ if (filter.to) {
179
+ whereClauses.push("t.date <= ?");
180
+ whereParams.push(filter.to);
181
+ }
182
+ if (filter.description_contains) {
183
+ whereClauses.push("LOWER(t.description) LIKE ?");
184
+ whereParams.push(`%${filter.description_contains.toLowerCase()}%`);
185
+ }
186
+ const matchIdsSql = `SELECT p.id
187
+ FROM postings p
188
+ JOIN transactions t ON t.id = p.transaction_id
189
+ WHERE ${whereClauses.join(" AND ")}`;
190
+ const sets = [];
191
+ const setParams = [];
192
+ if (set.account_id !== undefined) {
193
+ sets.push("account_id = ?");
194
+ setParams.push(set.account_id);
195
+ }
196
+ if (set.memo !== undefined) {
197
+ sets.push("memo = ?");
198
+ setParams.push(set.memo);
199
+ }
200
+ let affected = 0;
201
+ let sample = [];
202
+ const tx = db.transaction(() => {
203
+ const ids = db.prepare(matchIdsSql).all(...whereParams);
204
+ if (ids.length === 0)
205
+ return;
206
+ sample = ids.slice(0, 10).map((r) => r.id);
207
+ const placeholders = ids.map(() => "?").join(",");
208
+ affected = db.prepare(`UPDATE postings SET ${sets.join(", ")} WHERE id IN (${placeholders})`).run(...setParams, ...ids.map((r) => r.id)).changes;
209
+ });
210
+ tx();
211
+ return { affected, sample_posting_ids: sample };
212
+ }
132
213
  /**
133
214
  * Heuristic duplicate finder: group transactions by (rounded total debit) and check
134
215
  * pairs whose date difference is ≤ toleranceDays. Returns groups with ≥2 members.
@@ -370,3 +451,8 @@ export function countTransactions(db) {
370
451
  .get();
371
452
  return row;
372
453
  }
454
+ export function countTransactionsBySourceFile(db, fileId) {
455
+ return db
456
+ .prepare(`SELECT COUNT(*) AS n FROM transactions WHERE source_file_id = ?`)
457
+ .get(fileId).n;
458
+ }
package/dist/db/schema.js CHANGED
@@ -45,6 +45,8 @@ export function migrate(db) {
45
45
  status TEXT NOT NULL CHECK(status IN ('pending','scanned','failed')),
46
46
  raw_text TEXT,
47
47
  scanned_at TEXT,
48
+ provider TEXT,
49
+ model TEXT,
48
50
  error TEXT,
49
51
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
50
52
  );
@@ -110,10 +112,12 @@ export function migrate(db) {
110
112
  context_json TEXT,
111
113
  answer TEXT,
112
114
  resolved_at TEXT,
115
+ deferred_until TEXT,
113
116
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
114
117
  );
115
118
 
116
119
  CREATE INDEX IF NOT EXISTS questions_scan_idx ON questions(scan_id);
120
+ CREATE INDEX IF NOT EXISTS questions_deferred_idx ON questions(deferred_until);
117
121
 
118
122
  CREATE TABLE IF NOT EXISTS conversation_history (
119
123
  id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -129,6 +133,19 @@ export function migrate(db) {
129
133
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
130
134
  );
131
135
 
136
+ CREATE TABLE IF NOT EXISTS rules (
137
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
138
+ kind TEXT NOT NULL,
139
+ key TEXT NOT NULL,
140
+ target TEXT NOT NULL,
141
+ evidence_count INTEGER NOT NULL DEFAULT 1,
142
+ last_seen_at TEXT NOT NULL DEFAULT (datetime('now')),
143
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
144
+ UNIQUE(kind, key)
145
+ );
146
+
147
+ CREATE INDEX IF NOT EXISTS rules_kind_idx ON rules(kind);
148
+
132
149
  CREATE TABLE IF NOT EXISTS settings (
133
150
  key TEXT PRIMARY KEY,
134
151
  value TEXT NOT NULL
@@ -1,8 +1,20 @@
1
1
  import type Database from "libsql";
2
2
  import type { ClosedQuestion } from "../db/queries/questions.js";
3
3
  /**
4
- * Compact every closed question into a memories row (category `scanning_hint`).
5
- * The next scan's deterministic memoryRulePass picks them up. Dedups on body —
6
- * an identical rule for the same kind + prompt won't be re-inserted.
4
+ * Compact every closed question worth learning from into a `rules` row. The
5
+ * deterministic clarifier pass looks rules up by `(kind, key)` via the
6
+ * UNIQUE index, so each evidence event UPSERTs incrementing
7
+ * `evidence_count` and refreshing `last_seen_at` on repeats rather than
8
+ * appending a near-duplicate.
9
+ *
10
+ * A closure is NOT learned (no rule synthesized) when any of:
11
+ * 1. `kind` is in `RULE_KIND_DENYLIST` — failure-class kinds carry no
12
+ * generalizable signal.
13
+ * 2. `answer` starts with `Skip` — skips are one-time recovery decisions,
14
+ * not patterns the next scan should auto-apply.
15
+ * 3. `rule_key` is null — without a structural key the rule could only
16
+ * match its own prose, which embeds dates/amounts and never re-fires.
17
+ *
18
+ * Returns the count of rules upserted (new or repeat-evidence).
7
19
  */
8
20
  export declare function synthesizeMemoryRules(db: Database.Database, closures: readonly ClosedQuestion[]): number;
@@ -1,24 +1,45 @@
1
+ import { upsertRule } from "../db/queries/rules.js";
1
2
  /**
2
- * Compact every closed question into a memories row (category `scanning_hint`).
3
- * The next scan's deterministic memoryRulePass picks them up. Dedups on body —
4
- * an identical rule for the same kind + prompt won't be re-inserted.
3
+ * Compact every closed question worth learning from into a `rules` row. The
4
+ * deterministic clarifier pass looks rules up by `(kind, key)` via the
5
+ * UNIQUE index, so each evidence event UPSERTs incrementing
6
+ * `evidence_count` and refreshing `last_seen_at` on repeats rather than
7
+ * appending a near-duplicate.
8
+ *
9
+ * A closure is NOT learned (no rule synthesized) when any of:
10
+ * 1. `kind` is in `RULE_KIND_DENYLIST` — failure-class kinds carry no
11
+ * generalizable signal.
12
+ * 2. `answer` starts with `Skip` — skips are one-time recovery decisions,
13
+ * not patterns the next scan should auto-apply.
14
+ * 3. `rule_key` is null — without a structural key the rule could only
15
+ * match its own prose, which embeds dates/amounts and never re-fires.
16
+ *
17
+ * Returns the count of rules upserted (new or repeat-evidence).
5
18
  */
6
19
  export function synthesizeMemoryRules(db, closures) {
7
- if (closures.length === 0)
8
- return 0;
9
- let inserted = 0;
10
- const exists = db.prepare(`SELECT 1 FROM memories WHERE category = ? AND content = ? LIMIT 1`);
11
- const insert = db.prepare(`INSERT INTO memories (content, category) VALUES (?, ?)`);
12
- for (const c of closures) {
13
- const body = formatRule(c);
14
- if (exists.get("scanning_hint", body))
20
+ let upserted = 0;
21
+ for (const closure of closures) {
22
+ if (!isRuleSource(closure))
15
23
  continue;
16
- insert.run(body, "scanning_hint");
17
- inserted++;
24
+ upsertRule(db, { kind: closure.kind, key: closure.rule_key, target: closure.answer.trim() });
25
+ upserted++;
18
26
  }
19
- return inserted;
27
+ return upserted;
20
28
  }
21
- function formatRule(c) {
22
- const kindLabel = c.kind ?? "general";
23
- return `[${kindLabel}] ${c.prompt.replace(/\s+/g, " ").trim()} -> ${c.answer.trim()}`;
29
+ const RULE_KIND_DENYLIST = new Set([
30
+ "dirty_input",
31
+ "scan_truncated",
32
+ "boundary_continuation",
33
+ ]);
34
+ function isRuleSource(c) {
35
+ if (!c.kind || !c.rule_key)
36
+ return false;
37
+ if (RULE_KIND_DENYLIST.has(c.kind))
38
+ return false;
39
+ if (isSkipAnswer(c.answer))
40
+ return false;
41
+ return true;
42
+ }
43
+ function isSkipAnswer(answer) {
44
+ return answer.trim().toLowerCase().startsWith("skip");
24
45
  }
@@ -36,6 +36,7 @@ export declare const CLARIFIER_PASSES: readonly ClarifierPass[];
36
36
  * Single entry point shared by the in-scan resolve phase and the standalone
37
37
  * `plasalid clarify` command. Runs deterministic passes first, then (when
38
38
  * interactive) hands the leftovers to the LLM clarifier agent. Closed
39
- * questions get compacted into scanning_hint memories.
39
+ * questions get upserted into the rules table (keyed on the question's
40
+ * structural signature, not its prose).
40
41
  */
41
42
  export declare function runClarify(opts: RunClarifyOpts): Promise<ClarifySummary>;
@@ -1,29 +1,38 @@
1
1
  import { closeQuestion, listQuestions, countQuestions, } from "../db/queries/questions.js";
2
2
  import { updatePosting } from "../db/queries/transactions.js";
3
+ import { findRule } from "../db/queries/rules.js";
3
4
  import { runClarifyAgent } from "../ai/agent.js";
4
5
  import { synthesizeMemoryRules } from "./clarifier-memory.js";
6
+ import { applyRecurrenceRules, generateRecurrenceCandidateQuestions, } from "./recurrence.js";
5
7
  import { converge } from "./converge.js";
6
8
  const MAX_AGENT_PASSES = 3;
7
9
  /**
8
- * Apply deterministic passes via memory_rules lookups. Closes any question
9
- * whose prompt has a stored scanning_hint that already encodes the answer.
10
+ * Apply deterministic resolution via a `(kind, key)` indexed lookup in the
11
+ * rules table. The rule's `key` was computed at question-creation time
12
+ * (see `src/scanner/committer.ts`) from a stable structural signature — merchant id,
13
+ * normalized descriptor, account pair — so the same pattern matches
14
+ * across scans regardless of date, amount, or prompt prose.
10
15
  */
11
16
  const memoryRulePass = {
12
17
  name: "memory_rule",
13
- kinds: ["uncategorized", "uncategorized_expense", "duplicate", "correlation", "recurrence_candidate", "similar_accounts", "boundary_continuation", "scan_truncated", "scan_commit_failure"],
18
+ kinds: [
19
+ "uncategorized",
20
+ "uncategorized_expense",
21
+ "duplicate",
22
+ "correlation",
23
+ "similar_accounts",
24
+ "boundary_continuation",
25
+ "scan_truncated",
26
+ "unknown_merchant",
27
+ ],
14
28
  async tryResolve(u, ctx) {
15
- const rules = ctx.db
16
- .prepare(`SELECT content FROM memories WHERE category = 'scanning_hint'`)
17
- .all();
18
- const key = canonicalKey(u);
19
- for (const r of rules) {
20
- const match = parseRule(r.content);
21
- if (!match)
22
- continue;
23
- if (match.key === key)
24
- return match.answer;
25
- }
26
- return null;
29
+ if (!u.kind)
30
+ return null;
31
+ const key = extractRuleKey(u.context_json);
32
+ if (!key)
33
+ return null;
34
+ const rule = findRule(ctx.db, u.kind, key);
35
+ return rule?.target ?? null;
27
36
  },
28
37
  };
29
38
  /**
@@ -69,12 +78,19 @@ export const CLARIFIER_PASSES = [
69
78
  * Single entry point shared by the in-scan resolve phase and the standalone
70
79
  * `plasalid clarify` command. Runs deterministic passes first, then (when
71
80
  * interactive) hands the leftovers to the LLM clarifier agent. Closed
72
- * questions get compacted into scanning_hint memories.
81
+ * questions get upserted into the rules table (keyed on the question's
82
+ * structural signature, not its prose).
73
83
  */
74
84
  export async function runClarify(opts) {
75
85
  const { db } = opts;
76
86
  const tally = {};
77
87
  const closures = [];
88
+ const autoLinked = applyRecurrenceRules(db).linked;
89
+ if (autoLinked > 0)
90
+ tally["recurrence_auto_link"] = autoLinked;
91
+ const generated = generateRecurrenceCandidateQuestions(db, opts.scanId ?? null);
92
+ if (generated > 0)
93
+ tally["recurrence_generation"] = generated;
78
94
  const initial = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
79
95
  const total = initial.length;
80
96
  if (total === 0) {
@@ -182,16 +198,14 @@ function parseOptions(json) {
182
198
  return [];
183
199
  }
184
200
  }
185
- function canonicalKey(u) {
186
- return `[${u.kind ?? "general"}] ${u.prompt.replace(/\s+/g, " ").trim()}`;
187
- }
188
- function parseRule(body) {
189
- const idx = body.lastIndexOf(" -> ");
190
- if (idx < 0)
201
+ function extractRuleKey(contextJson) {
202
+ if (!contextJson)
191
203
  return null;
192
- const key = body.slice(0, idx).trim();
193
- const answer = body.slice(idx + 4).trim();
194
- if (!key || !answer)
204
+ try {
205
+ const parsed = JSON.parse(contextJson);
206
+ return typeof parsed?.rule_key === "string" ? parsed.rule_key : null;
207
+ }
208
+ catch {
195
209
  return null;
196
- return { key, answer };
210
+ }
197
211
  }
@@ -0,0 +1,56 @@
1
+ import type Database from "libsql";
2
+ import { type TransactionInput } from "../db/queries/transactions.js";
3
+ /**
4
+ * Staged best-effort transaction commit.
5
+ *
6
+ * Each stage returns a tagged union. Side effects (raising questions,
7
+ * progress emission, placeholder account creation) flow through the
8
+ * `CommitHooks` interface so the pipeline stays pure-ish and testable.
9
+ *
10
+ * The only legitimate drop path is a `dirty_input` validation failure
11
+ * (no date, malformed amount, etc.). Every other resolution problem —
12
+ * unknown merchant, unknown account — is rescued in-place: NULL the
13
+ * merchant, fuzzy-match-or-create the account, raise a typed question
14
+ * for the clarifier to review later.
15
+ */
16
+ export interface CommitContext {
17
+ readonly scanId: string | null;
18
+ readonly fileId: string | null;
19
+ readonly chunkId: string | null;
20
+ readonly progress: ProgressEmitter | null;
21
+ }
22
+ export interface ProgressEmitter {
23
+ emit(event: {
24
+ chunkId: string;
25
+ kind: "tx" | "question";
26
+ }): void;
27
+ }
28
+ export type CommitOutcome = {
29
+ ok: true;
30
+ transactionId: string;
31
+ raisedQuestions: number;
32
+ } | {
33
+ ok: false;
34
+ reason: DropReason;
35
+ message: string;
36
+ raisedQuestions: number;
37
+ };
38
+ export type DropReason = "dirty_input";
39
+ export interface CommitHooks {
40
+ onCommitted(transactionId: string): void;
41
+ onDirtyInput(input: TransactionInput, reason: string): void;
42
+ onUnknownMerchant(input: TransactionInput, transactionId: string, attemptedId: string): void;
43
+ onPlaceholderAccount(accountId: string, transactionId: string): void;
44
+ onSimilarAccount(originalId: string, matchedId: string, transactionId: string): void;
45
+ }
46
+ /**
47
+ * Default hook wiring: raises typed questions into the DB, ticks the
48
+ * progress emitter. Tests substitute their own hooks to inspect events
49
+ * without touching the question table.
50
+ *
51
+ * Question writes are gated on `ctx.scanId` — outside a scan there is no
52
+ * audit trail to attach to, so best-effort resolution still happens but
53
+ * the typed question is suppressed.
54
+ */
55
+ export declare function defaultCommitHooks(db: Database.Database, ctx: CommitContext): CommitHooks;
56
+ export declare function runCommitPipeline(db: Database.Database, ctx: CommitContext, input: TransactionInput, hooks?: CommitHooks): CommitOutcome;
@@ -0,0 +1,204 @@
1
+ import { validateTransaction, insertTransactionRows, } from "../db/queries/transactions.js";
2
+ import { createAccount, findAccountById, findAccountsByFuzzyName, ensureStructuralAccount, ensureTopLevelRoot, TOP_LEVEL_TYPES, } from "../db/queries/account-balance.js";
3
+ import { recordQuestion } from "../db/queries/questions.js";
4
+ import { accountIdKey, accountPairKey, descriptorKey, } from "./rule-keys.js";
5
+ /**
6
+ * Default hook wiring: raises typed questions into the DB, ticks the
7
+ * progress emitter. Tests substitute their own hooks to inspect events
8
+ * without touching the question table.
9
+ *
10
+ * Question writes are gated on `ctx.scanId` — outside a scan there is no
11
+ * audit trail to attach to, so best-effort resolution still happens but
12
+ * the typed question is suppressed.
13
+ */
14
+ export function defaultCommitHooks(db, ctx) {
15
+ const tick = (kind) => {
16
+ if (ctx.progress && ctx.chunkId)
17
+ ctx.progress.emit({ chunkId: ctx.chunkId, kind });
18
+ };
19
+ const raise = (input) => {
20
+ if (!ctx.scanId)
21
+ return;
22
+ recordQuestion(db, { ...input, file_id: ctx.fileId, scan_id: ctx.scanId });
23
+ tick("question");
24
+ };
25
+ return {
26
+ onCommitted: () => tick("tx"),
27
+ onDirtyInput: (input, reason) => raise({
28
+ transaction_id: null,
29
+ account_id: null,
30
+ kind: "dirty_input",
31
+ prompt: `The scanner returned a row that couldn't be validated: ${reason}. ` +
32
+ `Raw description: "${input.description}" on ${input.date}.`,
33
+ context: { description: input.description, date: input.date, reason },
34
+ }),
35
+ onUnknownMerchant: (input, transactionId, attemptedId) => {
36
+ const descriptor = input.raw_descriptor || input.description;
37
+ raise({
38
+ transaction_id: transactionId,
39
+ account_id: null,
40
+ kind: "unknown_merchant",
41
+ prompt: `The scanner referenced merchant id "${attemptedId}" but no such merchant exists. ` +
42
+ `Link "${descriptor}" to an existing merchant or leave it unlinked.`,
43
+ context: { rule_key: descriptorKey(descriptor), descriptor, attempted_id: attemptedId },
44
+ });
45
+ },
46
+ onPlaceholderAccount: (accountId, transactionId) => raise({
47
+ transaction_id: transactionId,
48
+ account_id: accountId,
49
+ kind: "uncategorized",
50
+ prompt: `A placeholder account was created for posting "${accountId}". ` +
51
+ `Confirm the category, merge into an existing account, or rename.`,
52
+ context: { rule_key: accountIdKey(accountId), placeholder_id: accountId },
53
+ }),
54
+ onSimilarAccount: (originalId, matchedId, transactionId) => raise({
55
+ transaction_id: transactionId,
56
+ account_id: matchedId,
57
+ kind: "similar_accounts",
58
+ prompt: `The scanner referenced "${originalId}" — the closest existing account is "${matchedId}". ` +
59
+ `Confirm they are the same, or split them apart.`,
60
+ context: {
61
+ rule_key: accountPairKey(originalId, matchedId),
62
+ original_id: originalId,
63
+ matched_id: matchedId,
64
+ },
65
+ }),
66
+ };
67
+ }
68
+ export function runCommitPipeline(db, ctx, input, hooks = defaultCommitHooks(db, ctx)) {
69
+ const validation = stageValidate(input);
70
+ if (!validation.ok) {
71
+ hooks.onDirtyInput(input, validation.reason);
72
+ return { ok: false, reason: "dirty_input", message: validation.reason, raisedQuestions: 1 };
73
+ }
74
+ const merchant = stageResolveMerchant(db, validation.validated);
75
+ const accounts = stageResolveAccounts(db, validation.validated);
76
+ const committed = {
77
+ ...validation.validated,
78
+ merchant_id: merchant.merchantId,
79
+ postings: accounts.postings,
80
+ };
81
+ const tx = db.transaction(() => insertTransactionRows(db, committed));
82
+ tx();
83
+ hooks.onCommitted(committed.id);
84
+ const raised = applyHints({ hooks, transactionId: committed.id, merchant, accounts, input });
85
+ return { ok: true, transactionId: committed.id, raisedQuestions: raised };
86
+ }
87
+ function stageValidate(input) {
88
+ try {
89
+ return { ok: true, validated: validateTransaction(input) };
90
+ }
91
+ catch (err) {
92
+ return { ok: false, reason: err?.message ?? String(err) };
93
+ }
94
+ }
95
+ function stageResolveMerchant(db, input) {
96
+ if (!input.merchant_id)
97
+ return { merchantId: null, attemptedUnknownId: null };
98
+ const exists = db.prepare(`SELECT 1 FROM merchants WHERE id = ?`).get(input.merchant_id);
99
+ if (exists)
100
+ return { merchantId: input.merchant_id, attemptedUnknownId: null };
101
+ return { merchantId: null, attemptedUnknownId: input.merchant_id };
102
+ }
103
+ function stageResolveAccounts(db, input) {
104
+ const postings = [];
105
+ const hints = [];
106
+ for (const p of input.postings) {
107
+ const resolved = resolveOnePosting(db, p);
108
+ postings.push(resolved.posting);
109
+ if (resolved.hint)
110
+ hints.push(resolved.hint);
111
+ }
112
+ return { postings, hints };
113
+ }
114
+ function resolveOnePosting(db, posting) {
115
+ if (findAccountById(db, posting.account_id)) {
116
+ return { posting, hint: null };
117
+ }
118
+ const matched = bestFuzzyMatch(db, posting.account_id);
119
+ if (matched) {
120
+ return {
121
+ posting: { ...posting, account_id: matched },
122
+ hint: { type: "similar_matched", originalId: posting.account_id, matchedId: matched },
123
+ };
124
+ }
125
+ const placeholderId = ensurePlaceholderAccount(db, posting.account_id);
126
+ return {
127
+ posting: { ...posting, account_id: placeholderId },
128
+ hint: { type: "placeholder_created", accountId: placeholderId },
129
+ };
130
+ }
131
+ const FUZZY_THRESHOLD = 0.7;
132
+ function bestFuzzyMatch(db, accountId) {
133
+ const leaf = leafSegment(accountId).replace(/[-_]+/g, " ");
134
+ if (!leaf)
135
+ return null;
136
+ const matches = findAccountsByFuzzyName(db, leaf, FUZZY_THRESHOLD);
137
+ return matches[0]?.account.id ?? null;
138
+ }
139
+ function leafSegment(id) {
140
+ const segments = id.split(":");
141
+ return segments[segments.length - 1] ?? id;
142
+ }
143
+ /**
144
+ * Create the agent-supplied account id (and any missing intermediate parents)
145
+ * as placeholders so the transaction can land. If the id's top-level segment
146
+ * isn't a known account type, fall back to `expense:uncategorized`.
147
+ */
148
+ function ensurePlaceholderAccount(db, accountId) {
149
+ const segments = accountId.split(":").filter(Boolean);
150
+ if (segments.length === 0)
151
+ return ensureUncategorizedFallback(db);
152
+ const type = segments[0];
153
+ if (!TOP_LEVEL_TYPES.includes(type))
154
+ return ensureUncategorizedFallback(db);
155
+ ensureTopLevelRoot(db, type);
156
+ for (let i = 2; i <= segments.length; i++) {
157
+ const id = segments.slice(0, i).join(":");
158
+ if (findAccountById(db, id))
159
+ continue;
160
+ const parentId = i === 1 ? null : segments.slice(0, i - 1).join(":");
161
+ const name = humanizeSegment(segments[i - 1]);
162
+ try {
163
+ createAccount(db, { id, name, type, parent_id: parentId });
164
+ }
165
+ catch (err) {
166
+ if (err?.code === "ACCOUNT_EXISTS")
167
+ continue;
168
+ return ensureUncategorizedFallback(db);
169
+ }
170
+ }
171
+ return accountId;
172
+ }
173
+ function ensureUncategorizedFallback(db) {
174
+ ensureStructuralAccount(db, "expense:uncategorized");
175
+ return "expense:uncategorized";
176
+ }
177
+ function humanizeSegment(segment) {
178
+ const spaced = segment.replace(/[-_]+/g, " ").trim();
179
+ if (!spaced)
180
+ return "Placeholder";
181
+ return spaced.replace(/\b\w/g, (c) => c.toUpperCase());
182
+ }
183
+ function applyHints(args) {
184
+ let raised = 0;
185
+ if (args.merchant.attemptedUnknownId) {
186
+ args.hooks.onUnknownMerchant(args.input, args.transactionId, args.merchant.attemptedUnknownId);
187
+ raised++;
188
+ }
189
+ for (const hint of args.accounts.hints) {
190
+ dispatchHint(hint, args.hooks, args.transactionId);
191
+ raised++;
192
+ }
193
+ return raised;
194
+ }
195
+ function dispatchHint(hint, hooks, transactionId) {
196
+ switch (hint.type) {
197
+ case "placeholder_created":
198
+ hooks.onPlaceholderAccount(hint.accountId, transactionId);
199
+ return;
200
+ case "similar_matched":
201
+ hooks.onSimilarAccount(hint.originalId, hint.matchedId, transactionId);
202
+ return;
203
+ }
204
+ }
@@ -0,0 +1,56 @@
1
+ import type Database from "libsql";
2
+ import { type TransactionInput } from "../db/queries/transactions.js";
3
+ /**
4
+ * Staged best-effort transaction commit.
5
+ *
6
+ * Each stage returns a tagged union. Side effects (raising questions,
7
+ * progress emission, placeholder account creation) flow through the
8
+ * `CommitHooks` interface so the pipeline stays pure-ish and testable.
9
+ *
10
+ * The only legitimate drop path is a `dirty_input` validation failure
11
+ * (no date, malformed amount, etc.). Every other resolution problem —
12
+ * unknown merchant, unknown account — is rescued in-place: NULL the
13
+ * merchant, fuzzy-match-or-create the account, raise a typed question
14
+ * for the clarifier to review later.
15
+ */
16
+ export interface CommitContext {
17
+ readonly scanId: string | null;
18
+ readonly fileId: string | null;
19
+ readonly chunkId: string | null;
20
+ readonly progress: ProgressEmitter | null;
21
+ }
22
+ export interface ProgressEmitter {
23
+ emit(event: {
24
+ chunkId: string;
25
+ kind: "tx" | "question";
26
+ }): void;
27
+ }
28
+ export type CommitOutcome = {
29
+ ok: true;
30
+ transactionId: string;
31
+ raisedQuestions: number;
32
+ } | {
33
+ ok: false;
34
+ reason: DropReason;
35
+ message: string;
36
+ raisedQuestions: number;
37
+ };
38
+ export type DropReason = "dirty_input";
39
+ export interface CommitHooks {
40
+ onCommitted(transactionId: string): void;
41
+ onDirtyInput(input: TransactionInput, reason: string): void;
42
+ onUnknownMerchant(input: TransactionInput, transactionId: string, attemptedId: string): void;
43
+ onPlaceholderAccount(accountId: string, transactionId: string): void;
44
+ onSimilarAccount(originalId: string, matchedId: string, transactionId: string): void;
45
+ }
46
+ /**
47
+ * Default hook wiring: raises typed questions into the DB, ticks the
48
+ * progress emitter. Tests substitute their own hooks to inspect events
49
+ * without touching the question table.
50
+ *
51
+ * Question writes are gated on `ctx.scanId` — outside a scan there is no
52
+ * audit trail to attach to, so best-effort resolution still happens but
53
+ * the typed question is suppressed.
54
+ */
55
+ export declare function defaultCommitHooks(db: Database.Database, ctx: CommitContext): CommitHooks;
56
+ export declare function commitTransaction(db: Database.Database, ctx: CommitContext, input: TransactionInput, hooks?: CommitHooks): CommitOutcome;