plasalid 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/README.md +2 -2
  2. package/dist/ai/agent.d.ts +6 -7
  3. package/dist/ai/agent.js +27 -11
  4. package/dist/ai/personas.js +48 -46
  5. package/dist/ai/system-prompt.js +1 -1
  6. package/dist/ai/tools/account-mutex.d.ts +1 -0
  7. package/dist/ai/tools/account-mutex.js +16 -0
  8. package/dist/ai/tools/index.js +4 -12
  9. package/dist/ai/tools/ingest.d.ts +1 -1
  10. package/dist/ai/tools/ingest.js +282 -242
  11. package/dist/ai/tools/merchants.js +1 -28
  12. package/dist/ai/tools/read.js +8 -8
  13. package/dist/ai/tools/record.js +3 -36
  14. package/dist/ai/tools/resolve.js +25 -22
  15. package/dist/ai/tools/scan.js +0 -1
  16. package/dist/ai/tools/types.d.ts +14 -21
  17. package/dist/cli/commands/record.js +1 -82
  18. package/dist/cli/commands/resolve.d.ts +5 -2
  19. package/dist/cli/commands/resolve.js +36 -5
  20. package/dist/cli/commands/revert.js +4 -2
  21. package/dist/cli/commands/rules.js +2 -2
  22. package/dist/cli/commands/scan.js +199 -128
  23. package/dist/cli/commands/status.js +5 -5
  24. package/dist/cli/index.js +8 -29
  25. package/dist/cli/ink/ScanDashboard.d.ts +49 -0
  26. package/dist/cli/ink/ScanDashboard.js +214 -0
  27. package/dist/cli/ink/scan_dashboard.d.ts +40 -25
  28. package/dist/cli/ink/scan_dashboard.js +139 -44
  29. package/dist/db/queries/account-balance.d.ts +1 -1
  30. package/dist/db/queries/questions.d.ts +62 -0
  31. package/dist/db/queries/questions.js +110 -0
  32. package/dist/db/queries/transactions.d.ts +1 -1
  33. package/dist/db/queries/unknowns.d.ts +17 -15
  34. package/dist/db/queries/unknowns.js +35 -39
  35. package/dist/db/schema.js +6 -28
  36. package/dist/scanner/audit/auditor.d.ts +31 -0
  37. package/dist/scanner/audit/auditor.js +72 -0
  38. package/dist/scanner/audit/engine.d.ts +10 -0
  39. package/dist/scanner/audit/engine.js +98 -0
  40. package/dist/scanner/audit/eventBus.d.ts +60 -0
  41. package/dist/scanner/audit/eventBus.js +35 -0
  42. package/dist/scanner/audit/passes/index.d.ts +11 -0
  43. package/dist/scanner/audit/passes/index.js +9 -0
  44. package/dist/scanner/audit/passes/types.d.ts +23 -0
  45. package/dist/scanner/audit/passes/types.js +1 -0
  46. package/dist/scanner/audit/types.d.ts +27 -0
  47. package/dist/scanner/audit/types.js +1 -0
  48. package/dist/scanner/auditor.d.ts +51 -0
  49. package/dist/scanner/auditor.js +80 -0
  50. package/dist/scanner/buffer/engine.d.ts +9 -0
  51. package/dist/scanner/buffer/engine.js +110 -0
  52. package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
  53. package/dist/scanner/buffer/sharedBuffer.js +130 -0
  54. package/dist/scanner/buffer/types.d.ts +67 -0
  55. package/dist/scanner/buffer/types.js +1 -0
  56. package/dist/scanner/buffer.d.ts +45 -38
  57. package/dist/scanner/buffer.js +93 -61
  58. package/dist/scanner/bus/engine.d.ts +11 -0
  59. package/dist/scanner/bus/engine.js +42 -0
  60. package/dist/scanner/bus/types.d.ts +53 -0
  61. package/dist/scanner/bus/types.js +1 -0
  62. package/dist/scanner/bus.d.ts +38 -0
  63. package/dist/scanner/bus.js +37 -0
  64. package/dist/scanner/chunk-worker.d.ts +19 -0
  65. package/dist/scanner/chunk-worker.js +67 -0
  66. package/dist/scanner/chunkWorker.d.ts +20 -0
  67. package/dist/scanner/chunkWorker.js +59 -0
  68. package/dist/scanner/chunker/chunker.d.ts +7 -0
  69. package/dist/scanner/chunker/chunker.js +60 -0
  70. package/dist/scanner/chunker.d.ts +7 -0
  71. package/dist/scanner/chunker.js +60 -0
  72. package/dist/scanner/converge.d.ts +29 -0
  73. package/dist/scanner/converge.js +15 -0
  74. package/dist/scanner/decrypt.d.ts +10 -0
  75. package/dist/scanner/decrypt.js +80 -0
  76. package/dist/scanner/engine/scanEngine.d.ts +24 -0
  77. package/dist/scanner/engine/scanEngine.js +87 -0
  78. package/dist/scanner/engine/types.d.ts +90 -0
  79. package/dist/scanner/engine/types.js +1 -0
  80. package/dist/scanner/engine.d.ts +90 -0
  81. package/dist/scanner/engine.js +84 -0
  82. package/dist/scanner/file-worker.d.ts +33 -0
  83. package/dist/scanner/file-worker.js +28 -0
  84. package/dist/scanner/fileWorker.d.ts +33 -0
  85. package/dist/scanner/fileWorker.js +22 -0
  86. package/dist/scanner/hooks/types.d.ts +25 -0
  87. package/dist/scanner/hooks/types.js +1 -0
  88. package/dist/scanner/hooks.d.ts +23 -0
  89. package/dist/scanner/hooks.js +1 -0
  90. package/dist/scanner/parse.d.ts +10 -0
  91. package/dist/scanner/parse.js +47 -0
  92. package/dist/scanner/passes/index.d.ts +8 -0
  93. package/dist/scanner/passes/index.js +6 -0
  94. package/dist/scanner/passes/types.d.ts +22 -0
  95. package/dist/scanner/passes/types.js +1 -0
  96. package/dist/scanner/pdf/chunker.d.ts +7 -0
  97. package/dist/scanner/pdf/chunker.js +60 -0
  98. package/dist/scanner/pdf/password-store.d.ts +34 -0
  99. package/dist/scanner/pdf/password-store.js +83 -0
  100. package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
  101. package/dist/scanner/pdf/pdf-unlock.js +50 -0
  102. package/dist/scanner/pdf/pdf.d.ts +17 -0
  103. package/dist/scanner/pdf/pdf.js +36 -0
  104. package/dist/scanner/pdf/state-machine.d.ts +60 -0
  105. package/dist/scanner/pdf/state-machine.js +64 -0
  106. package/dist/scanner/pdf/unlock.d.ts +22 -0
  107. package/dist/scanner/pdf/unlock.js +121 -0
  108. package/dist/scanner/phase-decrypt.d.ts +10 -0
  109. package/dist/scanner/phase-decrypt.js +80 -0
  110. package/dist/scanner/phase-parse.d.ts +10 -0
  111. package/dist/scanner/phase-parse.js +46 -0
  112. package/dist/scanner/phases/chunk.d.ts +8 -0
  113. package/dist/scanner/phases/chunk.js +13 -0
  114. package/dist/scanner/phases/commit.d.ts +12 -0
  115. package/dist/scanner/phases/commit.js +140 -0
  116. package/dist/scanner/phases/decrypt.d.ts +10 -0
  117. package/dist/scanner/phases/decrypt.js +80 -0
  118. package/dist/scanner/phases/parse.d.ts +10 -0
  119. package/dist/scanner/phases/parse.js +46 -0
  120. package/dist/scanner/phases/resolve.d.ts +10 -0
  121. package/dist/scanner/phases/resolve.js +17 -0
  122. package/dist/scanner/phases/review.d.ts +10 -0
  123. package/dist/scanner/phases/review.js +12 -0
  124. package/dist/scanner/progress.d.ts +14 -0
  125. package/dist/scanner/progress.js +21 -0
  126. package/dist/scanner/resolver-memory.d.ts +8 -0
  127. package/dist/scanner/resolver-memory.js +24 -0
  128. package/dist/scanner/resolver.d.ts +39 -0
  129. package/dist/scanner/resolver.js +196 -0
  130. package/dist/scanner/result.d.ts +17 -0
  131. package/dist/scanner/result.js +19 -0
  132. package/dist/scanner/run-passes.d.ts +30 -0
  133. package/dist/scanner/run-passes.js +15 -0
  134. package/dist/scanner/unlock.js +1 -1
  135. package/dist/scanner/worker.d.ts +19 -0
  136. package/dist/scanner/worker.js +67 -0
  137. package/dist/scanner/workers/chunkWorker.d.ts +20 -0
  138. package/dist/scanner/workers/chunkWorker.js +65 -0
  139. package/dist/scanner/workers/fileWorker.d.ts +32 -0
  140. package/dist/scanner/workers/fileWorker.js +22 -0
  141. package/package.json +1 -1
@@ -0,0 +1,46 @@
1
+ import { runWithConcurrency } from "../concurrency.js";
2
+ import { runFileWorker } from "../file-worker.js";
3
+ import { errorMessage } from "../result.js";
4
+ const DEFAULT_MAX_FILE_WORKERS = 5;
5
+ const DEFAULT_MAX_CHUNK_WORKERS_PER_FILE = 5;
6
+ const HARD_CAP = 8;
7
+ const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
8
+ /**
9
+ * Phase 3 — fan out FileWorkers in parallel. Each FileWorker fans out its
10
+ * file's chunks in parallel internally. The scanId + progress sink are
11
+ * threaded through ScanState; chunk-worker tools write to the DB directly
12
+ * and tick the progress sink as they go.
13
+ */
14
+ export async function parsePhase(db, state, hooks) {
15
+ await hooks.beforeParse?.(state);
16
+ const maxFile = clamp(state.options.maxFileWorkers, DEFAULT_MAX_FILE_WORKERS);
17
+ const maxChunk = clamp(state.options.maxChunkWorkersPerFile, DEFAULT_MAX_CHUNK_WORKERS_PER_FILE);
18
+ const fileGroups = state.decrypted
19
+ .map(file => ({
20
+ fileId: file.path,
21
+ scannedFileId: file.scannedFileId,
22
+ chunks: state.chunks.filter(c => c.fileId === file.path),
23
+ }))
24
+ .filter(g => g.chunks.length > 0);
25
+ const tasks = fileGroups.map(group => () => runFileWorker({
26
+ db,
27
+ scanId: state.scanId,
28
+ scannedFileId: group.scannedFileId,
29
+ progress: state.progress,
30
+ fileId: group.fileId,
31
+ chunks: group.chunks,
32
+ maxChunkWorkers: maxChunk,
33
+ }, hooks));
34
+ const settled = await runWithConcurrency(tasks, maxFile);
35
+ for (let i = 0; i < settled.length; i++) {
36
+ const r = settled[i];
37
+ if (!r.ok)
38
+ state.errors.push({ phase: "parse", target: fileGroups[i].fileId, error: errorMessage(r.error) });
39
+ }
40
+ for (const file of state.decrypted) {
41
+ if (!file.scannedFileId)
42
+ continue;
43
+ db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now') WHERE id = ?`).run(file.scannedFileId);
44
+ }
45
+ await hooks.afterParse?.(state);
46
+ }
@@ -0,0 +1,10 @@
1
+ import type Database from "libsql";
2
+ import type { ScanState } from "../engine.js";
3
+ import type { ScanHooks } from "../hooks.js";
4
+ /**
5
+ * Phase 4 — close every open unknown raised during this scan. Deterministic
6
+ * passes (memory rules, merchant defaults) run first; whatever survives goes
7
+ * to the LLM resolver agent when interactive. Closed unknowns get compacted
8
+ * into scanning_hint memories so the next scan picks them up automatically.
9
+ */
10
+ export declare function resolvePhase(db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
@@ -0,0 +1,17 @@
1
+ import { runResolve } from "../resolver.js";
2
+ /**
3
+ * Phase 4 — close every open unknown raised during this scan. Deterministic
4
+ * passes (memory rules, merchant defaults) run first; whatever survives goes
5
+ * to the LLM resolver agent when interactive. Closed unknowns get compacted
6
+ * into scanning_hint memories so the next scan picks them up automatically.
7
+ */
8
+ export async function resolvePhase(db, state, hooks) {
9
+ await hooks.beforeResolve?.(state);
10
+ const summary = await runResolve({
11
+ db,
12
+ scanId: state.scanId,
13
+ interactive: state.options.interactive ?? true,
14
+ });
15
+ state.resolveSummary = summary;
16
+ await hooks.afterResolve?.(state, summary);
17
+ }
@@ -0,0 +1,10 @@
1
+ import type Database from "libsql";
2
+ import type { ScanState } from "../engine.js";
3
+ import type { ScanHooks } from "../hooks.js";
4
+ /**
5
+ * Phase 4 — present the buffer to the user for confirmation, then set
6
+ * `state.review` to either `"commit"` or `"abort"`. Today this is a simple
7
+ * auto-commit path so the engine compiles; the full Ink TUI is a separate
8
+ * follow-up (review TUI task).
9
+ */
10
+ export declare function reviewPhase(_db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Phase 4 — present the buffer to the user for confirmation, then set
3
+ * `state.review` to either `"commit"` or `"abort"`. Today this is a simple
4
+ * auto-commit path so the engine compiles; the full Ink TUI is a separate
5
+ * follow-up (review TUI task).
6
+ */
7
+ export async function reviewPhase(_db, state, hooks) {
8
+ const snapshot = state.buffer.snapshot();
9
+ await hooks.beforeReview?.(state, snapshot);
10
+ state.review = "commit";
11
+ await hooks.afterReview?.(state);
12
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Single-typed event sink scan-worker tools emit into as they write to the DB.
3
+ * Replaces the bus + buffer for in-flight progress: one consumer at a time
4
+ * (dashboard or plain-hooks counters) reads ticks per chunk.
5
+ */
6
+ export interface ScanProgressEvent {
7
+ readonly chunkId: string;
8
+ readonly kind: "tx" | "question";
9
+ }
10
+ export interface ScanProgress {
11
+ emit(event: ScanProgressEvent): void;
12
+ subscribe(handler: (e: ScanProgressEvent) => void): () => void;
13
+ }
14
+ export declare function createProgress(): ScanProgress;
@@ -0,0 +1,21 @@
1
+ export function createProgress() {
2
+ const subscribers = new Set();
3
+ return {
4
+ emit(event) {
5
+ for (const fn of subscribers) {
6
+ try {
7
+ fn(event);
8
+ }
9
+ catch (err) {
10
+ console.error(`[progress listener] ${err instanceof Error ? err.message : String(err)}`);
11
+ }
12
+ }
13
+ },
14
+ subscribe(handler) {
15
+ subscribers.add(handler);
16
+ return () => {
17
+ subscribers.delete(handler);
18
+ };
19
+ },
20
+ };
21
+ }
@@ -0,0 +1,8 @@
1
+ import type Database from "libsql";
2
+ import type { ClosedQuestion } from "../db/queries/questions.js";
3
+ /**
4
+ * Compact every closed question into a memories row (category `scanning_hint`).
5
+ * The next scan's deterministic memoryRulePass picks them up. Dedups on body —
6
+ * an identical rule for the same kind + prompt won't be re-inserted.
7
+ */
8
+ export declare function synthesizeMemoryRules(db: Database.Database, closures: readonly ClosedQuestion[]): number;
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Compact every closed question into a memories row (category `scanning_hint`).
3
+ * The next scan's deterministic memoryRulePass picks them up. Dedups on body —
4
+ * an identical rule for the same kind + prompt won't be re-inserted.
5
+ */
6
+ export function synthesizeMemoryRules(db, closures) {
7
+ if (closures.length === 0)
8
+ return 0;
9
+ let inserted = 0;
10
+ const exists = db.prepare(`SELECT 1 FROM memories WHERE category = ? AND content = ? LIMIT 1`);
11
+ const insert = db.prepare(`INSERT INTO memories (content, category) VALUES (?, ?)`);
12
+ for (const c of closures) {
13
+ const body = formatRule(c);
14
+ if (exists.get("scanning_hint", body))
15
+ continue;
16
+ insert.run(body, "scanning_hint");
17
+ inserted++;
18
+ }
19
+ return inserted;
20
+ }
21
+ function formatRule(c) {
22
+ const kindLabel = c.kind ?? "general";
23
+ return `[${kindLabel}] ${c.prompt.replace(/\s+/g, " ").trim()} -> ${c.answer.trim()}`;
24
+ }
@@ -0,0 +1,39 @@
1
+ import type Database from "libsql";
2
+ import { type QuestionRow } from "../db/queries/questions.js";
3
+ export interface ResolverContext {
4
+ readonly db: Database.Database;
5
+ readonly tally: Record<string, number>;
6
+ }
7
+ export interface ResolverPass {
8
+ readonly name: string;
9
+ readonly kinds: readonly string[];
10
+ /** Try to close one question. Returns the answer if closed, else null. */
11
+ tryResolve(u: QuestionRow, ctx: ResolverContext): Promise<string | null>;
12
+ }
13
+ export interface ResolveSummary {
14
+ readonly total: number;
15
+ readonly resolved: number;
16
+ readonly remaining: number;
17
+ readonly tally: Readonly<Record<string, number>>;
18
+ }
19
+ export interface RunResolveOpts {
20
+ db: Database.Database;
21
+ /** Narrows to a single scan's questions. Omit = every question. */
22
+ scanId?: string;
23
+ interactive?: boolean;
24
+ promptUser?: (prompt: string, options?: string[], facts?: any) => Promise<string>;
25
+ onProgress?: (event: {
26
+ phase: "tool" | "responding";
27
+ toolName?: string;
28
+ toolCount: number;
29
+ elapsedMs: number;
30
+ }) => void;
31
+ }
32
+ export declare const RESOLVER_PASSES: readonly ResolverPass[];
33
+ /**
34
+ * Single entry point shared by the in-scan resolve phase and the standalone
35
+ * `plasalid resolve` command. Runs deterministic passes first, then (when
36
+ * interactive) hands the leftovers to the LLM resolver agent. Closed
37
+ * questions get compacted into scanning_hint memories.
38
+ */
39
+ export declare function runResolve(opts: RunResolveOpts): Promise<ResolveSummary>;
@@ -0,0 +1,196 @@
1
+ import { closeQuestion, listQuestions, countQuestions, } from "../db/queries/questions.js";
2
+ import { updatePosting } from "../db/queries/transactions.js";
3
+ import { runResolveAgent } from "../ai/agent.js";
4
+ import { synthesizeMemoryRules } from "./resolver-memory.js";
5
+ import { converge } from "./converge.js";
6
+ const MAX_AGENT_PASSES = 3;
7
+ /**
8
+ * Apply deterministic passes via memory_rules lookups. Closes any question
9
+ * whose prompt has a stored scanning_hint that already encodes the answer.
10
+ */
11
+ const memoryRulePass = {
12
+ name: "memory_rule",
13
+ kinds: ["uncategorized", "uncategorized_expense", "duplicate", "correlation", "recurrence_candidate", "similar_accounts", "boundary_continuation", "scan_truncated", "scan_commit_failure"],
14
+ async tryResolve(u, ctx) {
15
+ const rules = ctx.db
16
+ .prepare(`SELECT content FROM memories WHERE category = 'scanning_hint'`)
17
+ .all();
18
+ const key = canonicalKey(u);
19
+ for (const r of rules) {
20
+ const match = parseRule(r.content);
21
+ if (!match)
22
+ continue;
23
+ if (match.key === key)
24
+ return match.answer;
25
+ }
26
+ return null;
27
+ },
28
+ };
29
+ /**
30
+ * For an uncategorized expense whose transaction has a merchant with a
31
+ * stored default_account_id, apply the default to every expense posting on
32
+ * that transaction.
33
+ */
34
+ const merchantDefaultPass = {
35
+ name: "merchant_default",
36
+ kinds: ["uncategorized_expense"],
37
+ async tryResolve(u, ctx) {
38
+ if (!u.transaction_id)
39
+ return null;
40
+ const tx = ctx.db
41
+ .prepare(`SELECT merchant_id FROM transactions WHERE id = ?`)
42
+ .get(u.transaction_id);
43
+ if (!tx?.merchant_id)
44
+ return null;
45
+ const merchant = ctx.db
46
+ .prepare(`SELECT default_account_id FROM merchants WHERE id = ?`)
47
+ .get(tx.merchant_id);
48
+ const target = merchant?.default_account_id;
49
+ if (!target)
50
+ return null;
51
+ const postings = ctx.db
52
+ .prepare(`SELECT p.id FROM postings p
53
+ JOIN accounts a ON a.id = p.account_id
54
+ WHERE p.transaction_id = ? AND a.id = 'expense:uncategorized'`)
55
+ .all(u.transaction_id);
56
+ if (postings.length === 0)
57
+ return null;
58
+ for (const p of postings) {
59
+ updatePosting(ctx.db, p.id, { account_id: target });
60
+ }
61
+ return target;
62
+ },
63
+ };
64
+ export const RESOLVER_PASSES = [
65
+ memoryRulePass,
66
+ merchantDefaultPass,
67
+ ];
68
+ /**
69
+ * Single entry point shared by the in-scan resolve phase and the standalone
70
+ * `plasalid resolve` command. Runs deterministic passes first, then (when
71
+ * interactive) hands the leftovers to the LLM resolver agent. Closed
72
+ * questions get compacted into scanning_hint memories.
73
+ */
74
+ export async function runResolve(opts) {
75
+ const { db } = opts;
76
+ const tally = {};
77
+ const closures = [];
78
+ const initial = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
79
+ const total = initial.length;
80
+ if (total === 0) {
81
+ return { total: 0, resolved: 0, remaining: 0, tally };
82
+ }
83
+ for (const u of initial) {
84
+ const passes = matchingPasses(u);
85
+ if (passes.length === 0)
86
+ continue;
87
+ const result = await tryPasses(u, passes, { db, tally });
88
+ if (!result)
89
+ continue;
90
+ const closed = closeQuestion(db, u.id, result.answer);
91
+ if (!closed)
92
+ continue;
93
+ closures.push(closed);
94
+ tally[result.passName] = (tally[result.passName] ?? 0) + 1;
95
+ }
96
+ const interactive = opts.interactive ?? true;
97
+ if (interactive && countRemaining(db, opts.scanId) > 0) {
98
+ await runAgentLoop(opts, closures, tally);
99
+ }
100
+ synthesizeMemoryRules(db, closures);
101
+ const remaining = countRemaining(db, opts.scanId);
102
+ return { total, resolved: total - remaining, remaining, tally };
103
+ }
104
+ function matchingPasses(u) {
105
+ if (!u.kind)
106
+ return [];
107
+ return RESOLVER_PASSES.filter(p => p.kinds.includes(u.kind));
108
+ }
109
+ async function tryPasses(u, passes, ctx) {
110
+ for (const pass of passes) {
111
+ let answer;
112
+ try {
113
+ answer = await pass.tryResolve(u, ctx);
114
+ }
115
+ catch (err) {
116
+ console.error(`[resolver pass ${pass.name}] ${err instanceof Error ? err.message : String(err)}`);
117
+ answer = null;
118
+ }
119
+ if (answer != null)
120
+ return { passName: pass.name, answer };
121
+ }
122
+ return null;
123
+ }
124
+ function countRemaining(db, scanId) {
125
+ return scanId ? countQuestions(db, { scan_id: scanId }) : countQuestions(db);
126
+ }
127
+ /**
128
+ * Stall-protected outer loop around the LLM resolver. Each pass re-fetches
129
+ * leftover questions, hands them to the agent, and the agent closes what it
130
+ * can via close_question / ask_user. The loop stops when nothing closes
131
+ * between passes. After each pass we diff the pre/post set to recover the
132
+ * (prompt, kind, answer) tuples the agent closed without going through the
133
+ * memoryRulePass path.
134
+ */
135
+ async function runAgentLoop(opts, closures, tally) {
136
+ const { db } = opts;
137
+ await converge({
138
+ initial: countRemaining(db, opts.scanId),
139
+ maxAttempts: MAX_AGENT_PASSES,
140
+ isDone: (n) => n === 0,
141
+ isStalled: (curr, prev) => curr >= prev,
142
+ onPass: async () => {
143
+ const before = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
144
+ if (before.length === 0)
145
+ return 0;
146
+ await runResolveAgent({
147
+ db,
148
+ prompt: {},
149
+ initialMessages: [{ role: "user", content: buildResolveUserMessage(before) }],
150
+ agentCtx: {
151
+ interactive: true,
152
+ promptUser: opts.promptUser,
153
+ onQuestionClosed: (closed) => {
154
+ closures.push(closed);
155
+ tally["agent_resolution"] = (tally["agent_resolution"] ?? 0) + 1;
156
+ },
157
+ },
158
+ onProgress: opts.onProgress,
159
+ });
160
+ return countRemaining(db, opts.scanId);
161
+ },
162
+ });
163
+ }
164
+ function buildResolveUserMessage(questions) {
165
+ const lines = [`${questions.length} question(s) to resolve.`, ``, `Questions:`];
166
+ for (const c of questions) {
167
+ const options = parseOptions(c.options_json);
168
+ const optionsStr = options.length > 0 ? ` | options=[${options.join(" / ")}]` : "";
169
+ lines.push(`- ${c.id} | kind=${c.kind ?? "(none)"} | tx=${c.transaction_id ?? "(none)"} | acct=${c.account_id ?? "(none)"} | file=${c.file_id ?? "(none)"}${optionsStr}`, ` prompt: ${c.prompt.replace(/\n/g, " ")}`);
170
+ }
171
+ return lines.join("\n");
172
+ }
173
+ function parseOptions(json) {
174
+ if (!json)
175
+ return [];
176
+ try {
177
+ const parsed = JSON.parse(json);
178
+ return Array.isArray(parsed) ? parsed.filter((o) => typeof o === "string") : [];
179
+ }
180
+ catch {
181
+ return [];
182
+ }
183
+ }
184
+ function canonicalKey(u) {
185
+ return `[${u.kind ?? "general"}] ${u.prompt.replace(/\s+/g, " ").trim()}`;
186
+ }
187
+ function parseRule(body) {
188
+ const idx = body.lastIndexOf(" -> ");
189
+ if (idx < 0)
190
+ return null;
191
+ const key = body.slice(0, idx).trim();
192
+ const answer = body.slice(idx + 4).trim();
193
+ if (!key || !answer)
194
+ return null;
195
+ return { key, answer };
196
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Lightweight Result helpers shared across scanner subdomains. Use this
3
+ * instead of inline try/catch when a function can fail with a human-readable
4
+ * reason and the caller needs to branch on the outcome (decrypt, chunk parse,
5
+ * commit-one-transaction). Distinct from concurrency.ts `Settled<T>` — that
6
+ * type is owned by `runWithConcurrency` and includes an `error: unknown`;
7
+ * `Result<T>` stringifies the error up front for ergonomic message handling.
8
+ */
9
+ export type Result<T> = {
10
+ ok: true;
11
+ value: T;
12
+ } | {
13
+ ok: false;
14
+ error: string;
15
+ };
16
+ export declare function errorMessage(err: unknown): string;
17
+ export declare function tryExecute<T>(fn: () => Promise<T> | T): Promise<Result<T>>;
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Lightweight Result helpers shared across scanner subdomains. Use this
3
+ * instead of inline try/catch when a function can fail with a human-readable
4
+ * reason and the caller needs to branch on the outcome (decrypt, chunk parse,
5
+ * commit-one-transaction). Distinct from concurrency.ts `Settled<T>` — that
6
+ * type is owned by `runWithConcurrency` and includes an `error: unknown`;
7
+ * `Result<T>` stringifies the error up front for ergonomic message handling.
8
+ */
9
+ export function errorMessage(err) {
10
+ return err instanceof Error ? err.message : String(err);
11
+ }
12
+ export async function tryExecute(fn) {
13
+ try {
14
+ return { ok: true, value: await fn() };
15
+ }
16
+ catch (err) {
17
+ return { ok: false, error: errorMessage(err) };
18
+ }
19
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Generic "drive a loop with named hooks" helper.
3
+ *
4
+ * The driver owns counting passes, stall detection, and the iteration cap.
5
+ * Everything else (work performed each pass, what to print when, how to react
6
+ * to stall vs success vs failure) lives in the hooks the caller supplies.
7
+ *
8
+ * The state `S` is whatever quantity decides "are we done?" — typically a
9
+ * remaining-work count, but it can be any value you can compare.
10
+ */
11
+ export interface RunPassesOpts<S> {
12
+ /** Initial state (e.g. `countOpenUnknowns(db)`). */
13
+ initial: S;
14
+ /** Maximum number of passes before declaring failure. Must be >= 1. */
15
+ maxAttempts: number;
16
+ /** True when the work is finished and the loop should stop cleanly. */
17
+ isDone: (state: S) => boolean;
18
+ /**
19
+ * True when this pass made no progress vs the previous pass. Fires after
20
+ * the first pass at the earliest.
21
+ */
22
+ isStalled: (curr: S, prev: S) => boolean;
23
+ /** Run one pass; return the new state. Pass numbers are 1-indexed. */
24
+ onPass: (pass: number, state: S) => Promise<S>;
25
+ onStart?: (state: S) => void;
26
+ onStall?: (state: S) => void;
27
+ onSuccess?: (state: S) => void;
28
+ onFail?: (state: S) => void;
29
+ }
30
+ export declare function runPasses<S>(opts: RunPassesOpts<S>): Promise<S>;
@@ -0,0 +1,15 @@
1
+ export async function runPasses(opts) {
2
+ let state = opts.initial;
3
+ let prev = state;
4
+ opts.onStart?.(state);
5
+ for (let pass = 1; pass <= opts.maxAttempts && !opts.isDone(state); pass++) {
6
+ if (pass > 1 && opts.isStalled(state, prev)) {
7
+ opts.onStall?.(state);
8
+ return state;
9
+ }
10
+ prev = state;
11
+ state = await opts.onPass(pass, state);
12
+ }
13
+ (opts.isDone(state) ? opts.onSuccess : opts.onFail)?.(state);
14
+ return state;
15
+ }
@@ -115,7 +115,7 @@ export function persistUnlockOutcome(db, filePath, outcome) {
115
115
  spinner.succeed(`Saved password for pattern ${pattern} in secure vault.`);
116
116
  }
117
117
  catch (err) {
118
- spinner.fail(`Could not save password: ${err.message}`);
118
+ spinner.fail(`Could not save password: ${err instanceof Error ? err.message : String(err)}`);
119
119
  throw err;
120
120
  }
121
121
  }
@@ -0,0 +1,19 @@
1
+ import type Database from "libsql";
2
+ import type { Chunk } from "./engine.js";
3
+ import type { ScanHooks } from "./hooks.js";
4
+ import type { ScanProgress } from "./progress.js";
5
+ export interface ScanWorkerDeps {
6
+ readonly db: Database.Database;
7
+ readonly scanId: string;
8
+ readonly scannedFileId: string | undefined;
9
+ readonly progress: ScanProgress;
10
+ readonly chunk: Chunk;
11
+ }
12
+ /**
13
+ * Process one chunk: run the LLM scan agent over a single-page PDF blob with
14
+ * scanId + progress sink + scanned_files row injected through the agent
15
+ * context. Agent's record_transactions / note_question calls write directly to
16
+ * the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
17
+ * as a `chunk_failed` question so the resolver can pick them up.
18
+ */
19
+ export declare function runScanWorker(deps: ScanWorkerDeps, hooks: ScanHooks): Promise<void>;
@@ -0,0 +1,67 @@
1
+ import { randomUUID } from "crypto";
2
+ import { runScanAgent } from "../ai/agent.js";
3
+ import { recordQuestion } from "../db/queries/questions.js";
4
+ import { buildDocumentBlock } from "./pdf/pdf.js";
5
+ import { tryExecute } from "./result.js";
6
+ /**
7
+ * Process one chunk: run the LLM scan agent over a single-page PDF blob with
8
+ * scanId + progress sink + scanned_files row injected through the agent
9
+ * context. Agent's record_transactions / note_question calls write directly to
10
+ * the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
11
+ * as a `chunk_failed` question so the resolver can pick them up.
12
+ */
13
+ export async function runScanWorker(deps, hooks) {
14
+ const workerId = `cw:${randomUUID()}`;
15
+ hooks.onWorkerStart?.(workerId, deps.chunk);
16
+ const outcome = await tryExecute(() => runScanAgent({
17
+ db: deps.db,
18
+ initialMessages: [
19
+ {
20
+ role: "user",
21
+ content: [
22
+ buildDocumentBlock(deps.chunk.bytes, deps.chunk.fileName, deps.chunk.mime),
23
+ { type: "text", text: buildChunkPrompt(deps.chunk) },
24
+ ],
25
+ },
26
+ ],
27
+ prompt: { fileName: deps.chunk.fileName },
28
+ agentCtx: {
29
+ interactive: false,
30
+ scanId: deps.scanId,
31
+ fileId: deps.scannedFileId,
32
+ chunkId: deps.chunk.chunkId,
33
+ progress: deps.progress,
34
+ },
35
+ }));
36
+ hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
37
+ if (!outcome.ok)
38
+ recordChunkFailure(deps, outcome.error);
39
+ }
40
+ function recordChunkFailure(deps, error) {
41
+ try {
42
+ recordQuestion(deps.db, {
43
+ file_id: deps.scannedFileId ?? null,
44
+ scan_id: deps.scanId,
45
+ transaction_id: null,
46
+ account_id: null,
47
+ kind: "chunk_failed",
48
+ prompt: `Chunk ${deps.chunk.fileName} p${deps.chunk.pageNumber} failed to parse: ${error}.`,
49
+ });
50
+ deps.progress.emit({ chunkId: deps.chunk.chunkId, kind: "question" });
51
+ }
52
+ catch {
53
+ // failure to record a failure shouldn't crash the file worker
54
+ }
55
+ }
56
+ function buildChunkPrompt(chunk) {
57
+ return [
58
+ `You are parsing page ${chunk.pageNumber} of ${chunk.totalPages} of ${chunk.fileName}.`,
59
+ ``,
60
+ `Steps:`,
61
+ `1. Call list_accounts to see what already exists.`,
62
+ `2. If this page reveals an account that isn't in the chart yet, call create_account once.`,
63
+ `3. For every transaction on this page, call record_transactions (plural) with all rows in one batch.`,
64
+ `4. If the first or last row looks incomplete (no date, or no amount column visible — the row likely continues onto an adjacent page), call note_question with kind="boundary_continuation" and the raw row text. Do NOT invent missing fields.`,
65
+ `5. When done with this page, call mark_file_scanned with a short summary.`,
66
+ ].join("\n");
67
+ }
@@ -0,0 +1,20 @@
1
+ import type Database from "libsql";
2
+ import type { ScanBuffer } from "../buffer/types.js";
3
+ import type { Chunk, ScanHooks } from "../engine/types.js";
4
+ export interface ChunkWorkerDeps {
5
+ readonly db: Database.Database;
6
+ readonly buffer: ScanBuffer;
7
+ readonly chunk: Chunk;
8
+ }
9
+ export interface ChunkWorkerResult {
10
+ readonly workerId: string;
11
+ readonly ok: boolean;
12
+ readonly error?: string;
13
+ }
14
+ /**
15
+ * Process one chunk: run the LLM scan agent over a single-page PDF blob with
16
+ * the shared Buffer + chunkId injected. The agent's `record_transactions`
17
+ * calls land in the shared buffer; events fan out to the auditor and the
18
+ * dashboard.
19
+ */
20
+ export declare function runChunkWorker(deps: ChunkWorkerDeps, hooks: ScanHooks): Promise<ChunkWorkerResult>;