plasalid 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/README.md +15 -15
  2. package/dist/accounts/taxonomy.d.ts +1 -1
  3. package/dist/accounts/taxonomy.js +1 -1
  4. package/dist/ai/agent.d.ts +9 -10
  5. package/dist/ai/agent.js +31 -15
  6. package/dist/ai/personas.d.ts +1 -1
  7. package/dist/ai/personas.js +57 -55
  8. package/dist/ai/prompt-sections.d.ts +4 -4
  9. package/dist/ai/prompt-sections.js +1 -1
  10. package/dist/ai/system-prompt.d.ts +2 -2
  11. package/dist/ai/system-prompt.js +5 -5
  12. package/dist/ai/tools/account-mutex.d.ts +1 -0
  13. package/dist/ai/tools/account-mutex.js +16 -0
  14. package/dist/ai/tools/clarify.d.ts +2 -0
  15. package/dist/ai/tools/clarify.js +169 -0
  16. package/dist/ai/tools/index.js +10 -18
  17. package/dist/ai/tools/ingest.d.ts +2 -2
  18. package/dist/ai/tools/ingest.js +284 -244
  19. package/dist/ai/tools/merchants.js +1 -28
  20. package/dist/ai/tools/read.js +8 -8
  21. package/dist/ai/tools/record.js +7 -40
  22. package/dist/ai/tools/resolve.js +25 -22
  23. package/dist/ai/tools/scan.js +0 -1
  24. package/dist/ai/tools/types.d.ts +14 -21
  25. package/dist/cli/commands/clarify.d.ts +5 -0
  26. package/dist/cli/commands/clarify.js +44 -0
  27. package/dist/cli/commands/record.js +1 -82
  28. package/dist/cli/commands/resolve.d.ts +5 -2
  29. package/dist/cli/commands/resolve.js +36 -5
  30. package/dist/cli/commands/revert.js +4 -2
  31. package/dist/cli/commands/rules.js +2 -2
  32. package/dist/cli/commands/scan.js +199 -128
  33. package/dist/cli/commands/status.js +6 -6
  34. package/dist/cli/index.js +8 -29
  35. package/dist/cli/ink/ScanDashboard.d.ts +49 -0
  36. package/dist/cli/ink/ScanDashboard.js +214 -0
  37. package/dist/cli/ink/scan_dashboard.d.ts +40 -25
  38. package/dist/cli/ink/scan_dashboard.js +139 -44
  39. package/dist/cli/setup.js +1 -1
  40. package/dist/cli/ux.js +1 -1
  41. package/dist/db/queries/account-balance.d.ts +1 -1
  42. package/dist/db/queries/questions.d.ts +62 -0
  43. package/dist/db/queries/questions.js +110 -0
  44. package/dist/db/queries/transactions.d.ts +1 -1
  45. package/dist/db/queries/unknowns.d.ts +17 -15
  46. package/dist/db/queries/unknowns.js +35 -39
  47. package/dist/db/schema.js +6 -28
  48. package/dist/scanner/audit/auditor.d.ts +31 -0
  49. package/dist/scanner/audit/auditor.js +72 -0
  50. package/dist/scanner/audit/engine.d.ts +10 -0
  51. package/dist/scanner/audit/engine.js +98 -0
  52. package/dist/scanner/audit/eventBus.d.ts +60 -0
  53. package/dist/scanner/audit/eventBus.js +35 -0
  54. package/dist/scanner/audit/passes/index.d.ts +11 -0
  55. package/dist/scanner/audit/passes/index.js +9 -0
  56. package/dist/scanner/audit/passes/types.d.ts +23 -0
  57. package/dist/scanner/audit/passes/types.js +1 -0
  58. package/dist/scanner/audit/types.d.ts +27 -0
  59. package/dist/scanner/audit/types.js +1 -0
  60. package/dist/scanner/auditor.d.ts +51 -0
  61. package/dist/scanner/auditor.js +80 -0
  62. package/dist/scanner/buffer/engine.d.ts +9 -0
  63. package/dist/scanner/buffer/engine.js +110 -0
  64. package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
  65. package/dist/scanner/buffer/sharedBuffer.js +130 -0
  66. package/dist/scanner/buffer/types.d.ts +67 -0
  67. package/dist/scanner/buffer/types.js +1 -0
  68. package/dist/scanner/buffer.d.ts +45 -38
  69. package/dist/scanner/buffer.js +93 -61
  70. package/dist/scanner/bus/engine.d.ts +11 -0
  71. package/dist/scanner/bus/engine.js +42 -0
  72. package/dist/scanner/bus/types.d.ts +53 -0
  73. package/dist/scanner/bus/types.js +1 -0
  74. package/dist/scanner/bus.d.ts +38 -0
  75. package/dist/scanner/bus.js +37 -0
  76. package/dist/scanner/chunk-worker.d.ts +19 -0
  77. package/dist/scanner/chunk-worker.js +67 -0
  78. package/dist/scanner/chunkWorker.d.ts +20 -0
  79. package/dist/scanner/chunkWorker.js +59 -0
  80. package/dist/scanner/chunker/chunker.d.ts +7 -0
  81. package/dist/scanner/chunker/chunker.js +60 -0
  82. package/dist/scanner/chunker.d.ts +7 -0
  83. package/dist/scanner/chunker.js +60 -0
  84. package/dist/scanner/clarifier-memory.d.ts +8 -0
  85. package/dist/scanner/clarifier-memory.js +24 -0
  86. package/dist/scanner/clarifier.d.ts +39 -0
  87. package/dist/scanner/clarifier.js +196 -0
  88. package/dist/scanner/converge.d.ts +29 -0
  89. package/dist/scanner/converge.js +15 -0
  90. package/dist/scanner/decrypt.d.ts +10 -0
  91. package/dist/scanner/decrypt.js +80 -0
  92. package/dist/scanner/engine/scanEngine.d.ts +24 -0
  93. package/dist/scanner/engine/scanEngine.js +87 -0
  94. package/dist/scanner/engine/types.d.ts +90 -0
  95. package/dist/scanner/engine/types.js +1 -0
  96. package/dist/scanner/engine.d.ts +90 -0
  97. package/dist/scanner/engine.js +84 -0
  98. package/dist/scanner/file-worker.d.ts +33 -0
  99. package/dist/scanner/file-worker.js +28 -0
  100. package/dist/scanner/fileWorker.d.ts +33 -0
  101. package/dist/scanner/fileWorker.js +22 -0
  102. package/dist/scanner/hooks/types.d.ts +25 -0
  103. package/dist/scanner/hooks/types.js +1 -0
  104. package/dist/scanner/hooks.d.ts +23 -0
  105. package/dist/scanner/hooks.js +1 -0
  106. package/dist/scanner/parse.d.ts +10 -0
  107. package/dist/scanner/parse.js +47 -0
  108. package/dist/scanner/passes/index.d.ts +8 -0
  109. package/dist/scanner/passes/index.js +6 -0
  110. package/dist/scanner/passes/types.d.ts +22 -0
  111. package/dist/scanner/passes/types.js +1 -0
  112. package/dist/scanner/pdf/chunker.d.ts +7 -0
  113. package/dist/scanner/pdf/chunker.js +60 -0
  114. package/dist/scanner/pdf/password-store.d.ts +34 -0
  115. package/dist/scanner/pdf/password-store.js +83 -0
  116. package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
  117. package/dist/scanner/pdf/pdf-unlock.js +50 -0
  118. package/dist/scanner/pdf/pdf.d.ts +17 -0
  119. package/dist/scanner/pdf/pdf.js +36 -0
  120. package/dist/scanner/pdf/state-machine.d.ts +60 -0
  121. package/dist/scanner/pdf/state-machine.js +64 -0
  122. package/dist/scanner/pdf/unlock.d.ts +22 -0
  123. package/dist/scanner/pdf/unlock.js +121 -0
  124. package/dist/scanner/phase-decrypt.d.ts +10 -0
  125. package/dist/scanner/phase-decrypt.js +80 -0
  126. package/dist/scanner/phase-parse.d.ts +10 -0
  127. package/dist/scanner/phase-parse.js +46 -0
  128. package/dist/scanner/phases/chunk.d.ts +8 -0
  129. package/dist/scanner/phases/chunk.js +13 -0
  130. package/dist/scanner/phases/commit.d.ts +12 -0
  131. package/dist/scanner/phases/commit.js +140 -0
  132. package/dist/scanner/phases/decrypt.d.ts +10 -0
  133. package/dist/scanner/phases/decrypt.js +80 -0
  134. package/dist/scanner/phases/parse.d.ts +10 -0
  135. package/dist/scanner/phases/parse.js +46 -0
  136. package/dist/scanner/phases/resolve.d.ts +10 -0
  137. package/dist/scanner/phases/resolve.js +17 -0
  138. package/dist/scanner/phases/review.d.ts +10 -0
  139. package/dist/scanner/phases/review.js +12 -0
  140. package/dist/scanner/progress.d.ts +14 -0
  141. package/dist/scanner/progress.js +21 -0
  142. package/dist/scanner/resolver-memory.d.ts +8 -0
  143. package/dist/scanner/resolver-memory.js +24 -0
  144. package/dist/scanner/resolver.d.ts +39 -0
  145. package/dist/scanner/resolver.js +196 -0
  146. package/dist/scanner/result.d.ts +17 -0
  147. package/dist/scanner/result.js +19 -0
  148. package/dist/scanner/run-passes.d.ts +30 -0
  149. package/dist/scanner/run-passes.js +15 -0
  150. package/dist/scanner/unlock.js +1 -1
  151. package/dist/scanner/worker.d.ts +19 -0
  152. package/dist/scanner/worker.js +67 -0
  153. package/dist/scanner/workers/chunkWorker.d.ts +20 -0
  154. package/dist/scanner/workers/chunkWorker.js +65 -0
  155. package/dist/scanner/workers/fileWorker.d.ts +32 -0
  156. package/dist/scanner/workers/fileWorker.js +22 -0
  157. package/package.json +1 -1
@@ -0,0 +1,60 @@
1
+ let mupdfPromise = null;
2
+ function getMupdf() {
3
+ if (!mupdfPromise)
4
+ mupdfPromise = import("mupdf");
5
+ return mupdfPromise;
6
+ }
7
+ /**
8
+ * Build one Chunk holding exactly page `pageIndex` of `file`. mupdf has no
9
+ * native page-range extract, so we clone the source doc and delete every
10
+ * other page, back-to-front so indices stay stable as we splice. Resource
11
+ * lifetime is contained in the try/finally so a saveToBuffer failure can't
12
+ * leak the cloned doc.
13
+ */
14
+ async function extractPage(file, pageIndex, pageCount) {
15
+ const mupdf = await getMupdf();
16
+ const clone = mupdf.Document.openDocument(file.decryptedBytes, file.mime);
17
+ try {
18
+ for (let j = pageCount - 1; j >= 0; j--) {
19
+ if (j !== pageIndex)
20
+ clone.deletePage(j);
21
+ }
22
+ const out = clone.saveToBuffer("decrypt");
23
+ return {
24
+ chunkId: `${file.path}#p${pageIndex + 1}`,
25
+ fileId: file.path,
26
+ fileName: file.fileName,
27
+ relPath: file.relPath,
28
+ pageNumber: pageIndex + 1,
29
+ totalPages: pageCount,
30
+ bytes: Buffer.from(out.asUint8Array()),
31
+ mime: file.mime,
32
+ };
33
+ }
34
+ finally {
35
+ clone.destroy();
36
+ }
37
+ }
38
+ /**
39
+ * Split one decrypted PDF into N single-page Chunks. Each chunk is a
40
+ * standalone, valid PDF so the per-chunk LLM agent gets a clean document
41
+ * without siblings.
42
+ */
43
+ export async function chunkPdf(file) {
44
+ const mupdf = await getMupdf();
45
+ const probe = mupdf.Document.openDocument(file.decryptedBytes, file.mime);
46
+ let pageCount;
47
+ try {
48
+ pageCount = probe.countPages();
49
+ }
50
+ finally {
51
+ probe.destroy();
52
+ }
53
+ if (pageCount <= 0)
54
+ return [];
55
+ const chunks = [];
56
+ for (let i = 0; i < pageCount; i++) {
57
+ chunks.push(await extractPage(file, i, pageCount));
58
+ }
59
+ return chunks;
60
+ }
@@ -0,0 +1,7 @@
1
+ import type { Chunk, DecryptedFile } from "./engine.js";
2
+ /**
3
+ * Split one decrypted PDF into N single-page Chunks. Each chunk is a
4
+ * standalone, valid PDF so the per-chunk LLM agent gets a clean document
5
+ * without siblings.
6
+ */
7
+ export declare function chunkPdf(file: DecryptedFile): Promise<Chunk[]>;
@@ -0,0 +1,60 @@
1
+ let mupdfPromise = null;
2
+ function getMupdf() {
3
+ if (!mupdfPromise)
4
+ mupdfPromise = import("mupdf");
5
+ return mupdfPromise;
6
+ }
7
+ /**
8
+ * Build one Chunk holding exactly page `pageIndex` of `file`. mupdf has no
9
+ * native page-range extract, so we clone the source doc and delete every
10
+ * other page, back-to-front so indices stay stable as we splice. Resource
11
+ * lifetime is contained in the try/finally so a saveToBuffer failure can't
12
+ * leak the cloned doc.
13
+ */
14
+ async function extractPage(file, pageIndex, pageCount) {
15
+ const mupdf = await getMupdf();
16
+ const clone = mupdf.Document.openDocument(file.decryptedBytes, file.mime);
17
+ try {
18
+ for (let j = pageCount - 1; j >= 0; j--) {
19
+ if (j !== pageIndex)
20
+ clone.deletePage(j);
21
+ }
22
+ const out = clone.saveToBuffer("decrypt");
23
+ return {
24
+ chunkId: `${file.path}#p${pageIndex + 1}`,
25
+ fileId: file.path,
26
+ fileName: file.fileName,
27
+ relPath: file.relPath,
28
+ pageNumber: pageIndex + 1,
29
+ totalPages: pageCount,
30
+ bytes: Buffer.from(out.asUint8Array()),
31
+ mime: file.mime,
32
+ };
33
+ }
34
+ finally {
35
+ clone.destroy();
36
+ }
37
+ }
38
+ /**
39
+ * Split one decrypted PDF into N single-page Chunks. Each chunk is a
40
+ * standalone, valid PDF so the per-chunk LLM agent gets a clean document
41
+ * without siblings.
42
+ */
43
+ export async function chunkPdf(file) {
44
+ const mupdf = await getMupdf();
45
+ const probe = mupdf.Document.openDocument(file.decryptedBytes, file.mime);
46
+ let pageCount;
47
+ try {
48
+ pageCount = probe.countPages();
49
+ }
50
+ finally {
51
+ probe.destroy();
52
+ }
53
+ if (pageCount <= 0)
54
+ return [];
55
+ const chunks = [];
56
+ for (let i = 0; i < pageCount; i++) {
57
+ chunks.push(await extractPage(file, i, pageCount));
58
+ }
59
+ return chunks;
60
+ }
@@ -0,0 +1,8 @@
1
+ import type Database from "libsql";
2
+ import type { ClosedQuestion } from "../db/queries/questions.js";
3
+ /**
4
+ * Compact every closed question into a memories row (category `scanning_hint`).
5
+ * The next scan's deterministic memoryRulePass picks them up. Dedups on body —
6
+ * an identical rule for the same kind + prompt won't be re-inserted.
7
+ */
8
+ export declare function synthesizeMemoryRules(db: Database.Database, closures: readonly ClosedQuestion[]): number;
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Compact every closed question into a memories row (category `scanning_hint`).
3
+ * The next scan's deterministic memoryRulePass picks them up. Dedups on body —
4
+ * an identical rule for the same kind + prompt won't be re-inserted.
5
+ */
6
+ export function synthesizeMemoryRules(db, closures) {
7
+ if (closures.length === 0)
8
+ return 0;
9
+ let inserted = 0;
10
+ const exists = db.prepare(`SELECT 1 FROM memories WHERE category = ? AND content = ? LIMIT 1`);
11
+ const insert = db.prepare(`INSERT INTO memories (content, category) VALUES (?, ?)`);
12
+ for (const c of closures) {
13
+ const body = formatRule(c);
14
+ if (exists.get("scanning_hint", body))
15
+ continue;
16
+ insert.run(body, "scanning_hint");
17
+ inserted++;
18
+ }
19
+ return inserted;
20
+ }
21
+ function formatRule(c) {
22
+ const kindLabel = c.kind ?? "general";
23
+ return `[${kindLabel}] ${c.prompt.replace(/\s+/g, " ").trim()} -> ${c.answer.trim()}`;
24
+ }
@@ -0,0 +1,39 @@
1
+ import type Database from "libsql";
2
+ import { type QuestionRow } from "../db/queries/questions.js";
3
+ export interface ClarifierContext {
4
+ readonly db: Database.Database;
5
+ readonly tally: Record<string, number>;
6
+ }
7
+ export interface ClarifierPass {
8
+ readonly name: string;
9
+ readonly kinds: readonly string[];
10
+ /** Try to close one question. Returns the answer if closed, else null. */
11
+ tryResolve(u: QuestionRow, ctx: ClarifierContext): Promise<string | null>;
12
+ }
13
+ export interface ClarifySummary {
14
+ readonly total: number;
15
+ readonly clarified: number;
16
+ readonly remaining: number;
17
+ readonly tally: Readonly<Record<string, number>>;
18
+ }
19
+ export interface RunClarifyOpts {
20
+ db: Database.Database;
21
+ /** Narrows to a single scan's questions. Omit = every question. */
22
+ scanId?: string;
23
+ interactive?: boolean;
24
+ promptUser?: (prompt: string, options?: string[], facts?: any) => Promise<string>;
25
+ onProgress?: (event: {
26
+ phase: "tool" | "responding";
27
+ toolName?: string;
28
+ toolCount: number;
29
+ elapsedMs: number;
30
+ }) => void;
31
+ }
32
+ export declare const CLARIFIER_PASSES: readonly ClarifierPass[];
33
+ /**
34
+ * Single entry point shared by the in-scan resolve phase and the standalone
35
+ * `plasalid clarify` command. Runs deterministic passes first, then (when
36
+ * interactive) hands the leftovers to the LLM clarifier agent. Closed
37
+ * questions get compacted into scanning_hint memories.
38
+ */
39
+ export declare function runClarify(opts: RunClarifyOpts): Promise<ClarifySummary>;
@@ -0,0 +1,196 @@
1
+ import { closeQuestion, listQuestions, countQuestions, } from "../db/queries/questions.js";
2
+ import { updatePosting } from "../db/queries/transactions.js";
3
+ import { runClarifyAgent } from "../ai/agent.js";
4
+ import { synthesizeMemoryRules } from "./clarifier-memory.js";
5
+ import { converge } from "./converge.js";
6
+ const MAX_AGENT_PASSES = 3;
7
+ /**
8
+ * Apply deterministic passes via memory_rules lookups. Closes any question
9
+ * whose prompt has a stored scanning_hint that already encodes the answer.
10
+ */
11
+ const memoryRulePass = {
12
+ name: "memory_rule",
13
+ kinds: ["uncategorized", "uncategorized_expense", "duplicate", "correlation", "recurrence_candidate", "similar_accounts", "boundary_continuation", "scan_truncated", "scan_commit_failure"],
14
+ async tryResolve(u, ctx) {
15
+ const rules = ctx.db
16
+ .prepare(`SELECT content FROM memories WHERE category = 'scanning_hint'`)
17
+ .all();
18
+ const key = canonicalKey(u);
19
+ for (const r of rules) {
20
+ const match = parseRule(r.content);
21
+ if (!match)
22
+ continue;
23
+ if (match.key === key)
24
+ return match.answer;
25
+ }
26
+ return null;
27
+ },
28
+ };
29
+ /**
30
+ * For an uncategorized expense whose transaction has a merchant with a
31
+ * stored default_account_id, apply the default to every expense posting on
32
+ * that transaction.
33
+ */
34
+ const merchantDefaultPass = {
35
+ name: "merchant_default",
36
+ kinds: ["uncategorized_expense"],
37
+ async tryResolve(u, ctx) {
38
+ if (!u.transaction_id)
39
+ return null;
40
+ const tx = ctx.db
41
+ .prepare(`SELECT merchant_id FROM transactions WHERE id = ?`)
42
+ .get(u.transaction_id);
43
+ if (!tx?.merchant_id)
44
+ return null;
45
+ const merchant = ctx.db
46
+ .prepare(`SELECT default_account_id FROM merchants WHERE id = ?`)
47
+ .get(tx.merchant_id);
48
+ const target = merchant?.default_account_id;
49
+ if (!target)
50
+ return null;
51
+ const postings = ctx.db
52
+ .prepare(`SELECT p.id FROM postings p
53
+ JOIN accounts a ON a.id = p.account_id
54
+ WHERE p.transaction_id = ? AND a.id = 'expense:uncategorized'`)
55
+ .all(u.transaction_id);
56
+ if (postings.length === 0)
57
+ return null;
58
+ for (const p of postings) {
59
+ updatePosting(ctx.db, p.id, { account_id: target });
60
+ }
61
+ return target;
62
+ },
63
+ };
64
+ export const CLARIFIER_PASSES = [
65
+ memoryRulePass,
66
+ merchantDefaultPass,
67
+ ];
68
+ /**
69
+ * Single entry point shared by the in-scan resolve phase and the standalone
70
+ * `plasalid clarify` command. Runs deterministic passes first, then (when
71
+ * interactive) hands the leftovers to the LLM clarifier agent. Closed
72
+ * questions get compacted into scanning_hint memories.
73
+ */
74
+ export async function runClarify(opts) {
75
+ const { db } = opts;
76
+ const tally = {};
77
+ const closures = [];
78
+ const initial = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
79
+ const total = initial.length;
80
+ if (total === 0) {
81
+ return { total: 0, clarified: 0, remaining: 0, tally };
82
+ }
83
+ for (const u of initial) {
84
+ const passes = matchingPasses(u);
85
+ if (passes.length === 0)
86
+ continue;
87
+ const result = await tryPasses(u, passes, { db, tally });
88
+ if (!result)
89
+ continue;
90
+ const closed = closeQuestion(db, u.id, result.answer);
91
+ if (!closed)
92
+ continue;
93
+ closures.push(closed);
94
+ tally[result.passName] = (tally[result.passName] ?? 0) + 1;
95
+ }
96
+ const interactive = opts.interactive ?? true;
97
+ if (interactive && countRemaining(db, opts.scanId) > 0) {
98
+ await runAgentLoop(opts, closures, tally);
99
+ }
100
+ synthesizeMemoryRules(db, closures);
101
+ const remaining = countRemaining(db, opts.scanId);
102
+ return { total, clarified: total - remaining, remaining, tally };
103
+ }
104
+ function matchingPasses(u) {
105
+ if (!u.kind)
106
+ return [];
107
+ return CLARIFIER_PASSES.filter(p => p.kinds.includes(u.kind));
108
+ }
109
+ async function tryPasses(u, passes, ctx) {
110
+ for (const pass of passes) {
111
+ let answer;
112
+ try {
113
+ answer = await pass.tryResolve(u, ctx);
114
+ }
115
+ catch (err) {
116
+ console.error(`[clarifier pass ${pass.name}] ${err instanceof Error ? err.message : String(err)}`);
117
+ answer = null;
118
+ }
119
+ if (answer != null)
120
+ return { passName: pass.name, answer };
121
+ }
122
+ return null;
123
+ }
124
+ function countRemaining(db, scanId) {
125
+ return scanId ? countQuestions(db, { scan_id: scanId }) : countQuestions(db);
126
+ }
127
+ /**
128
+ * Stall-protected outer loop around the LLM clarifier. Each pass re-fetches
129
+ * leftover questions, hands them to the agent, and the agent closes what it
130
+ * can via close_question / ask_user. The loop stops when nothing closes
131
+ * between passes. After each pass we diff the pre/post set to recover the
132
+ * (prompt, kind, answer) tuples the agent closed without going through the
133
+ * memoryRulePass path.
134
+ */
135
+ async function runAgentLoop(opts, closures, tally) {
136
+ const { db } = opts;
137
+ await converge({
138
+ initial: countRemaining(db, opts.scanId),
139
+ maxAttempts: MAX_AGENT_PASSES,
140
+ isDone: (n) => n === 0,
141
+ isStalled: (curr, prev) => curr >= prev,
142
+ onPass: async () => {
143
+ const before = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
144
+ if (before.length === 0)
145
+ return 0;
146
+ await runClarifyAgent({
147
+ db,
148
+ prompt: {},
149
+ initialMessages: [{ role: "user", content: buildResolveUserMessage(before) }],
150
+ agentCtx: {
151
+ interactive: true,
152
+ promptUser: opts.promptUser,
153
+ onQuestionClosed: (closed) => {
154
+ closures.push(closed);
155
+ tally["agent_clarification"] = (tally["agent_clarification"] ?? 0) + 1;
156
+ },
157
+ },
158
+ onProgress: opts.onProgress,
159
+ });
160
+ return countRemaining(db, opts.scanId);
161
+ },
162
+ });
163
+ }
164
+ function buildResolveUserMessage(questions) {
165
+ const lines = [`${questions.length} question(s) to resolve.`, ``, `Questions:`];
166
+ for (const c of questions) {
167
+ const options = parseOptions(c.options_json);
168
+ const optionsStr = options.length > 0 ? ` | options=[${options.join(" / ")}]` : "";
169
+ lines.push(`- ${c.id} | kind=${c.kind ?? "(none)"} | tx=${c.transaction_id ?? "(none)"} | acct=${c.account_id ?? "(none)"} | file=${c.file_id ?? "(none)"}${optionsStr}`, ` prompt: ${c.prompt.replace(/\n/g, " ")}`);
170
+ }
171
+ return lines.join("\n");
172
+ }
173
+ function parseOptions(json) {
174
+ if (!json)
175
+ return [];
176
+ try {
177
+ const parsed = JSON.parse(json);
178
+ return Array.isArray(parsed) ? parsed.filter((o) => typeof o === "string") : [];
179
+ }
180
+ catch {
181
+ return [];
182
+ }
183
+ }
184
+ function canonicalKey(u) {
185
+ return `[${u.kind ?? "general"}] ${u.prompt.replace(/\s+/g, " ").trim()}`;
186
+ }
187
+ function parseRule(body) {
188
+ const idx = body.lastIndexOf(" -> ");
189
+ if (idx < 0)
190
+ return null;
191
+ const key = body.slice(0, idx).trim();
192
+ const answer = body.slice(idx + 4).trim();
193
+ if (!key || !answer)
194
+ return null;
195
+ return { key, answer };
196
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Drive a stateful loop toward convergence: keep running passes until the
3
+ * caller's `isDone` predicate is true (success), `isStalled` returns true
4
+ * across two passes (stall), or `maxAttempts` is exhausted (fail).
5
+ *
6
+ * The driver owns counting passes, stall detection, and the iteration cap.
7
+ * Everything else (work per pass, callbacks per terminal state) lives in the
8
+ * hooks the caller supplies. `S` is whatever quantity decides "are we done?".
9
+ */
10
+ export interface ConvergeOpts<S> {
11
+ /** Initial state (e.g. `countQuestions(db)`). */
12
+ initial: S;
13
+ /** Maximum number of passes before declaring failure. Must be >= 1. */
14
+ maxAttempts: number;
15
+ /** True when the work is finished and the loop should stop cleanly. */
16
+ isDone: (state: S) => boolean;
17
+ /**
18
+ * True when this pass made no progress vs the previous pass. Fires after
19
+ * the first pass at the earliest.
20
+ */
21
+ isStalled: (curr: S, prev: S) => boolean;
22
+ /** Run one pass; return the new state. Pass numbers are 1-indexed. */
23
+ onPass: (pass: number, state: S) => Promise<S>;
24
+ onStart?: (state: S) => void;
25
+ onStall?: (state: S) => void;
26
+ onSuccess?: (state: S) => void;
27
+ onFail?: (state: S) => void;
28
+ }
29
+ export declare function converge<S>(opts: ConvergeOpts<S>): Promise<S>;
@@ -0,0 +1,15 @@
1
+ export async function converge(opts) {
2
+ let state = opts.initial;
3
+ let prev = state;
4
+ opts.onStart?.(state);
5
+ for (let pass = 1; pass <= opts.maxAttempts && !opts.isDone(state); pass++) {
6
+ if (pass > 1 && opts.isStalled(state, prev)) {
7
+ opts.onStall?.(state);
8
+ return state;
9
+ }
10
+ prev = state;
11
+ state = await opts.onPass(pass, state);
12
+ }
13
+ (opts.isDone(state) ? opts.onSuccess : opts.onFail)?.(state);
14
+ return state;
15
+ }
@@ -0,0 +1,10 @@
1
+ import type Database from "libsql";
2
+ import type { ScanState } from "./engine.js";
3
+ import type { ScanHooks } from "./hooks.js";
4
+ /**
5
+ * Phase 1 — walk the data dir, optionally filter by regex, decrypt each file
6
+ * sequentially (password prompts can't share a TTY). Output partitions into
7
+ * decrypted / skipped / failed via a kind-keyed dispatch map. Bootstrapped
8
+ * scanned_files rows are tagged onto each DecryptedFile.
9
+ */
10
+ export declare function decryptPhase(db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
@@ -0,0 +1,80 @@
1
+ import { randomUUID } from "crypto";
2
+ import { readPdf } from "./pdf/pdf.js";
3
+ import { unlockIfNeeded, persistUnlockOutcome } from "./pdf/unlock.js";
4
+ import { scanDataDir } from "./walker.js";
5
+ import { tryExecute } from "./result.js";
6
+ function findScannedByHash(db, hash) {
7
+ return db
8
+ .prepare(`SELECT id FROM scanned_files WHERE file_hash = ?`)
9
+ .get(hash) ?? null;
10
+ }
11
+ async function decryptOne(db, file, opts) {
12
+ const read = await tryExecute(() => readPdf(file.path));
13
+ if (!read.ok)
14
+ return { kind: "failed", error: `read failed: ${read.error}` };
15
+ const pdf = read.value;
16
+ const existing = findScannedByHash(db, pdf.hash);
17
+ if (existing && !opts.force) {
18
+ return { kind: "skipped", existingScannedFileId: existing.id };
19
+ }
20
+ const unlock = await tryExecute(() => unlockIfNeeded({
21
+ db,
22
+ filePath: file.path,
23
+ bytes: pdf.bytes,
24
+ interactive: opts.interactive,
25
+ }));
26
+ if (!unlock.ok)
27
+ return { kind: "failed", error: unlock.error || "unlock failed" };
28
+ persistUnlockOutcome(db, file.path, unlock.value.outcome);
29
+ return {
30
+ kind: "decrypted",
31
+ file: {
32
+ path: file.path,
33
+ fileName: file.name,
34
+ relPath: file.relPath,
35
+ hash: pdf.hash,
36
+ mime: pdf.mime,
37
+ decryptedBytes: unlock.value.decrypted,
38
+ replacesPriorScannedFileId: existing?.id,
39
+ },
40
+ };
41
+ }
42
+ const APPLY = {
43
+ decrypted: (state, _file, o) => { state.decrypted.push(o.file); },
44
+ skipped: (state, file, o) => { state.skipped.push({ file, existingScannedFileId: o.existingScannedFileId }); },
45
+ failed: (state, file, o) => { state.failed.push({ file, error: o.error }); },
46
+ };
47
+ /**
48
+ * Bootstrap one scanned_files row per decrypted file. Chunk workers later
49
+ * stamp transactions with source_file_id, so the row must exist before any
50
+ * tool writes hit the DB. Status flips to 'scanned' after parse completes.
51
+ */
52
+ function bootstrapScannedFiles(db, state) {
53
+ for (const file of state.decrypted) {
54
+ if (file.replacesPriorScannedFileId) {
55
+ db.prepare(`DELETE FROM scanned_files WHERE id = ?`).run(file.replacesPriorScannedFileId);
56
+ }
57
+ const sfId = `sf:${randomUUID()}`;
58
+ db.prepare(`INSERT INTO scanned_files (id, path, file_hash, mime, status) VALUES (?, ?, ?, ?, 'pending')`).run(sfId, file.path, file.hash, file.mime);
59
+ file.scannedFileId = sfId;
60
+ }
61
+ }
62
+ /**
63
+ * Phase 1 — walk the data dir, optionally filter by regex, decrypt each file
64
+ * sequentially (password prompts can't share a TTY). Output partitions into
65
+ * decrypted / skipped / failed via a kind-keyed dispatch map. Bootstrapped
66
+ * scanned_files rows are tagged onto each DecryptedFile.
67
+ */
68
+ export async function decryptPhase(db, state, hooks) {
69
+ await hooks.beforeDecrypt?.(state);
70
+ const matcher = state.options.regex ? new RegExp(state.options.regex, "i") : null;
71
+ state.files = scanDataDir().filter(f => (matcher ? matcher.test(f.relPath) : true));
72
+ const interactive = state.options.interactive ?? true;
73
+ const force = !!state.options.force;
74
+ for (const file of state.files) {
75
+ const outcome = await decryptOne(db, file, { force, interactive });
76
+ APPLY[outcome.kind](state, file, outcome);
77
+ }
78
+ bootstrapScannedFiles(db, state);
79
+ await hooks.afterDecrypt?.(state);
80
+ }
@@ -0,0 +1,24 @@
1
+ import type Database from "libsql";
2
+ import type { CommitOutcome, Phase, PhaseName, RunScanOptions, ScanState } from "./types.js";
3
+ import type { ScanHooks } from "../hooks/types.js";
4
+ export interface ScanResult {
5
+ readonly scanId: string;
6
+ readonly state: ScanState;
7
+ readonly committed: CommitOutcome | null;
8
+ readonly aborted: boolean;
9
+ }
10
+ export declare const DEFAULT_PHASES: readonly {
11
+ name: PhaseName;
12
+ phase: Phase;
13
+ }[];
14
+ /**
15
+ * Composition root for a scan run. Builds the singleton subdomain instances
16
+ * (bus, buffer, audit engine) once, threads them through ScanState, then
17
+ * runs the phase chain. Auditor lifecycle wraps the whole chain so it sees
18
+ * every event from decrypt through commit.
19
+ *
20
+ * Per-run isolation: every call to runScan creates fresh instances. Nothing
21
+ * survives between scans.
22
+ */
23
+ export declare function runScan(db: Database.Database, opts?: RunScanOptions, hooks?: ScanHooks): Promise<ScanResult>;
24
+ export type { ScanState, ScanHooks, RunScanOptions, CommitOutcome, } from "./types.js";
@@ -0,0 +1,87 @@
1
+ import { randomUUID } from "crypto";
2
+ import { createBus } from "../bus/engine.js";
3
+ import { createBuffer } from "../buffer/engine.js";
4
+ import { createAuditEngine } from "../audit/engine.js";
5
+ import { AUDIT_PASSES } from "../audit/passes/index.js";
6
+ import { decryptPhase } from "../phases/decrypt.js";
7
+ import { chunkPhase } from "../phases/chunk.js";
8
+ import { parsePhase } from "../phases/parse.js";
9
+ import { reviewPhase } from "../phases/review.js";
10
+ import { commitPhase } from "../phases/commit.js";
11
+ export const DEFAULT_PHASES = [
12
+ { name: "decrypt", phase: decryptPhase },
13
+ { name: "chunk", phase: chunkPhase },
14
+ { name: "parse", phase: parsePhase },
15
+ { name: "review", phase: reviewPhase },
16
+ { name: "commit", phase: commitPhase },
17
+ ];
18
+ /**
19
+ * Composition root for a scan run. Builds the singleton subdomain instances
20
+ * (bus, buffer, audit engine) once, threads them through ScanState, then
21
+ * runs the phase chain. Auditor lifecycle wraps the whole chain so it sees
22
+ * every event from decrypt through commit.
23
+ *
24
+ * Per-run isolation: every call to runScan creates fresh instances. Nothing
25
+ * survives between scans.
26
+ */
27
+ export async function runScan(db, opts = {}, hooks = {}) {
28
+ const scanId = `sc:${randomUUID()}`;
29
+ const bus = createBus();
30
+ const buffer = createBuffer(scanId, bus);
31
+ const audit = createAuditEngine({ db, bus, buffer, passes: AUDIT_PASSES });
32
+ const state = {
33
+ scanId,
34
+ startedAt: Date.now(),
35
+ options: opts,
36
+ buffer,
37
+ bus,
38
+ files: [],
39
+ decrypted: [],
40
+ skipped: [],
41
+ failed: [],
42
+ chunks: [],
43
+ review: null,
44
+ committed: null,
45
+ errors: [],
46
+ auditApplied: {},
47
+ };
48
+ await fire(hooks.onStart, state);
49
+ audit.start();
50
+ const phases = opts.phases ?? DEFAULT_PHASES;
51
+ let aborted = false;
52
+ try {
53
+ for (const { name, phase } of phases) {
54
+ try {
55
+ await phase(db, state, hooks);
56
+ }
57
+ catch (err) {
58
+ state.errors.push({ phase: name, error: err });
59
+ await fire(hooks.onError, err, name, state);
60
+ aborted = true;
61
+ break;
62
+ }
63
+ if (name === "review" && state.review === "abort") {
64
+ aborted = true;
65
+ break;
66
+ }
67
+ }
68
+ }
69
+ finally {
70
+ audit.stop();
71
+ for (const [name, count] of Object.entries(audit.tally)) {
72
+ state.auditApplied[name] = count;
73
+ }
74
+ await fire(hooks.onFinish, state);
75
+ }
76
+ return { scanId, state, committed: state.committed, aborted };
77
+ }
78
+ async function fire(fn, ...args) {
79
+ if (!fn)
80
+ return;
81
+ try {
82
+ await fn(...args);
83
+ }
84
+ catch (err) {
85
+ console.error(`[scan-engine hook] ${err.message}`);
86
+ }
87
+ }