plasalid 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -4
- package/dist/ai/agent.d.ts +6 -7
- package/dist/ai/agent.js +27 -11
- package/dist/ai/personas.js +48 -46
- package/dist/ai/system-prompt.js +1 -1
- package/dist/ai/tools/account-mutex.d.ts +1 -0
- package/dist/ai/tools/account-mutex.js +16 -0
- package/dist/ai/tools/index.js +4 -12
- package/dist/ai/tools/ingest.d.ts +1 -1
- package/dist/ai/tools/ingest.js +282 -242
- package/dist/ai/tools/merchants.js +1 -28
- package/dist/ai/tools/read.js +8 -8
- package/dist/ai/tools/record.js +3 -36
- package/dist/ai/tools/resolve.js +25 -22
- package/dist/ai/tools/scan.js +0 -1
- package/dist/ai/tools/types.d.ts +14 -21
- package/dist/cli/commands/record.js +1 -82
- package/dist/cli/commands/resolve.d.ts +5 -2
- package/dist/cli/commands/resolve.js +36 -5
- package/dist/cli/commands/revert.js +4 -2
- package/dist/cli/commands/rules.js +2 -2
- package/dist/cli/commands/scan.js +199 -128
- package/dist/cli/commands/status.js +5 -5
- package/dist/cli/index.js +8 -29
- package/dist/cli/ink/ScanDashboard.d.ts +49 -0
- package/dist/cli/ink/ScanDashboard.js +214 -0
- package/dist/cli/ink/scan_dashboard.d.ts +40 -25
- package/dist/cli/ink/scan_dashboard.js +139 -44
- package/dist/db/queries/account-balance.d.ts +1 -1
- package/dist/db/queries/questions.d.ts +62 -0
- package/dist/db/queries/questions.js +110 -0
- package/dist/db/queries/transactions.d.ts +1 -1
- package/dist/db/queries/unknowns.d.ts +17 -15
- package/dist/db/queries/unknowns.js +35 -39
- package/dist/db/schema.js +6 -28
- package/dist/scanner/audit/auditor.d.ts +31 -0
- package/dist/scanner/audit/auditor.js +72 -0
- package/dist/scanner/audit/engine.d.ts +10 -0
- package/dist/scanner/audit/engine.js +98 -0
- package/dist/scanner/audit/eventBus.d.ts +60 -0
- package/dist/scanner/audit/eventBus.js +35 -0
- package/dist/scanner/audit/passes/index.d.ts +11 -0
- package/dist/scanner/audit/passes/index.js +9 -0
- package/dist/scanner/audit/passes/types.d.ts +23 -0
- package/dist/scanner/audit/passes/types.js +1 -0
- package/dist/scanner/audit/types.d.ts +27 -0
- package/dist/scanner/audit/types.js +1 -0
- package/dist/scanner/auditor.d.ts +51 -0
- package/dist/scanner/auditor.js +80 -0
- package/dist/scanner/buffer/engine.d.ts +9 -0
- package/dist/scanner/buffer/engine.js +110 -0
- package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
- package/dist/scanner/buffer/sharedBuffer.js +130 -0
- package/dist/scanner/buffer/types.d.ts +67 -0
- package/dist/scanner/buffer/types.js +1 -0
- package/dist/scanner/buffer.d.ts +45 -38
- package/dist/scanner/buffer.js +93 -61
- package/dist/scanner/bus/engine.d.ts +11 -0
- package/dist/scanner/bus/engine.js +42 -0
- package/dist/scanner/bus/types.d.ts +53 -0
- package/dist/scanner/bus/types.js +1 -0
- package/dist/scanner/bus.d.ts +38 -0
- package/dist/scanner/bus.js +37 -0
- package/dist/scanner/chunk-worker.d.ts +19 -0
- package/dist/scanner/chunk-worker.js +67 -0
- package/dist/scanner/chunkWorker.d.ts +20 -0
- package/dist/scanner/chunkWorker.js +59 -0
- package/dist/scanner/chunker/chunker.d.ts +7 -0
- package/dist/scanner/chunker/chunker.js +60 -0
- package/dist/scanner/chunker.d.ts +7 -0
- package/dist/scanner/chunker.js +60 -0
- package/dist/scanner/converge.d.ts +29 -0
- package/dist/scanner/converge.js +15 -0
- package/dist/scanner/decrypt.d.ts +10 -0
- package/dist/scanner/decrypt.js +80 -0
- package/dist/scanner/engine/scanEngine.d.ts +24 -0
- package/dist/scanner/engine/scanEngine.js +87 -0
- package/dist/scanner/engine/types.d.ts +90 -0
- package/dist/scanner/engine/types.js +1 -0
- package/dist/scanner/engine.d.ts +90 -0
- package/dist/scanner/engine.js +84 -0
- package/dist/scanner/file-worker.d.ts +33 -0
- package/dist/scanner/file-worker.js +28 -0
- package/dist/scanner/fileWorker.d.ts +33 -0
- package/dist/scanner/fileWorker.js +22 -0
- package/dist/scanner/hooks/types.d.ts +25 -0
- package/dist/scanner/hooks/types.js +1 -0
- package/dist/scanner/hooks.d.ts +23 -0
- package/dist/scanner/hooks.js +1 -0
- package/dist/scanner/parse.d.ts +10 -0
- package/dist/scanner/parse.js +47 -0
- package/dist/scanner/passes/index.d.ts +8 -0
- package/dist/scanner/passes/index.js +6 -0
- package/dist/scanner/passes/types.d.ts +22 -0
- package/dist/scanner/passes/types.js +1 -0
- package/dist/scanner/pdf/chunker.d.ts +7 -0
- package/dist/scanner/pdf/chunker.js +60 -0
- package/dist/scanner/pdf/password-store.d.ts +34 -0
- package/dist/scanner/pdf/password-store.js +83 -0
- package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
- package/dist/scanner/pdf/pdf-unlock.js +50 -0
- package/dist/scanner/pdf/pdf.d.ts +17 -0
- package/dist/scanner/pdf/pdf.js +36 -0
- package/dist/scanner/pdf/state-machine.d.ts +60 -0
- package/dist/scanner/pdf/state-machine.js +64 -0
- package/dist/scanner/pdf/unlock.d.ts +22 -0
- package/dist/scanner/pdf/unlock.js +121 -0
- package/dist/scanner/phase-decrypt.d.ts +10 -0
- package/dist/scanner/phase-decrypt.js +80 -0
- package/dist/scanner/phase-parse.d.ts +10 -0
- package/dist/scanner/phase-parse.js +46 -0
- package/dist/scanner/phases/chunk.d.ts +8 -0
- package/dist/scanner/phases/chunk.js +13 -0
- package/dist/scanner/phases/commit.d.ts +12 -0
- package/dist/scanner/phases/commit.js +140 -0
- package/dist/scanner/phases/decrypt.d.ts +10 -0
- package/dist/scanner/phases/decrypt.js +80 -0
- package/dist/scanner/phases/parse.d.ts +10 -0
- package/dist/scanner/phases/parse.js +46 -0
- package/dist/scanner/phases/resolve.d.ts +10 -0
- package/dist/scanner/phases/resolve.js +17 -0
- package/dist/scanner/phases/review.d.ts +10 -0
- package/dist/scanner/phases/review.js +12 -0
- package/dist/scanner/progress.d.ts +14 -0
- package/dist/scanner/progress.js +21 -0
- package/dist/scanner/resolver-memory.d.ts +8 -0
- package/dist/scanner/resolver-memory.js +24 -0
- package/dist/scanner/resolver.d.ts +39 -0
- package/dist/scanner/resolver.js +196 -0
- package/dist/scanner/result.d.ts +17 -0
- package/dist/scanner/result.js +19 -0
- package/dist/scanner/run-passes.d.ts +30 -0
- package/dist/scanner/run-passes.js +15 -0
- package/dist/scanner/unlock.js +1 -1
- package/dist/scanner/worker.d.ts +19 -0
- package/dist/scanner/worker.js +67 -0
- package/dist/scanner/workers/chunkWorker.d.ts +20 -0
- package/dist/scanner/workers/chunkWorker.js +65 -0
- package/dist/scanner/workers/fileWorker.d.ts +32 -0
- package/dist/scanner/workers/fileWorker.js +22 -0
- package/package.json +1 -1
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { runWithConcurrency } from "../concurrency.js";
|
|
2
|
+
import { runFileWorker } from "../file-worker.js";
|
|
3
|
+
import { errorMessage } from "../result.js";
|
|
4
|
+
const DEFAULT_MAX_FILE_WORKERS = 5;
|
|
5
|
+
const DEFAULT_MAX_CHUNK_WORKERS_PER_FILE = 5;
|
|
6
|
+
const HARD_CAP = 8;
|
|
7
|
+
const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
|
|
8
|
+
/**
|
|
9
|
+
* Phase 3 — fan out FileWorkers in parallel. Each FileWorker fans out its
|
|
10
|
+
* file's chunks in parallel internally. The scanId + progress sink are
|
|
11
|
+
* threaded through ScanState; chunk-worker tools write to the DB directly
|
|
12
|
+
* and tick the progress sink as they go.
|
|
13
|
+
*/
|
|
14
|
+
export async function parsePhase(db, state, hooks) {
|
|
15
|
+
await hooks.beforeParse?.(state);
|
|
16
|
+
const maxFile = clamp(state.options.maxFileWorkers, DEFAULT_MAX_FILE_WORKERS);
|
|
17
|
+
const maxChunk = clamp(state.options.maxChunkWorkersPerFile, DEFAULT_MAX_CHUNK_WORKERS_PER_FILE);
|
|
18
|
+
const fileGroups = state.decrypted
|
|
19
|
+
.map(file => ({
|
|
20
|
+
fileId: file.path,
|
|
21
|
+
scannedFileId: file.scannedFileId,
|
|
22
|
+
chunks: state.chunks.filter(c => c.fileId === file.path),
|
|
23
|
+
}))
|
|
24
|
+
.filter(g => g.chunks.length > 0);
|
|
25
|
+
const tasks = fileGroups.map(group => () => runFileWorker({
|
|
26
|
+
db,
|
|
27
|
+
scanId: state.scanId,
|
|
28
|
+
scannedFileId: group.scannedFileId,
|
|
29
|
+
progress: state.progress,
|
|
30
|
+
fileId: group.fileId,
|
|
31
|
+
chunks: group.chunks,
|
|
32
|
+
maxChunkWorkers: maxChunk,
|
|
33
|
+
}, hooks));
|
|
34
|
+
const settled = await runWithConcurrency(tasks, maxFile);
|
|
35
|
+
for (let i = 0; i < settled.length; i++) {
|
|
36
|
+
const r = settled[i];
|
|
37
|
+
if (!r.ok)
|
|
38
|
+
state.errors.push({ phase: "parse", target: fileGroups[i].fileId, error: errorMessage(r.error) });
|
|
39
|
+
}
|
|
40
|
+
for (const file of state.decrypted) {
|
|
41
|
+
if (!file.scannedFileId)
|
|
42
|
+
continue;
|
|
43
|
+
db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now') WHERE id = ?`).run(file.scannedFileId);
|
|
44
|
+
}
|
|
45
|
+
await hooks.afterParse?.(state);
|
|
46
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { ScanState } from "../engine.js";
|
|
3
|
+
import type { ScanHooks } from "../hooks.js";
|
|
4
|
+
/**
|
|
5
|
+
* Phase 4 — close every open unknown raised during this scan. Deterministic
|
|
6
|
+
* passes (memory rules, merchant defaults) run first; whatever survives goes
|
|
7
|
+
* to the LLM resolver agent when interactive. Closed unknowns get compacted
|
|
8
|
+
* into scanning_hint memories so the next scan picks them up automatically.
|
|
9
|
+
*/
|
|
10
|
+
export declare function resolvePhase(db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { runResolve } from "../resolver.js";
|
|
2
|
+
/**
|
|
3
|
+
* Phase 4 — close every open unknown raised during this scan. Deterministic
|
|
4
|
+
* passes (memory rules, merchant defaults) run first; whatever survives goes
|
|
5
|
+
* to the LLM resolver agent when interactive. Closed unknowns get compacted
|
|
6
|
+
* into scanning_hint memories so the next scan picks them up automatically.
|
|
7
|
+
*/
|
|
8
|
+
export async function resolvePhase(db, state, hooks) {
|
|
9
|
+
await hooks.beforeResolve?.(state);
|
|
10
|
+
const summary = await runResolve({
|
|
11
|
+
db,
|
|
12
|
+
scanId: state.scanId,
|
|
13
|
+
interactive: state.options.interactive ?? true,
|
|
14
|
+
});
|
|
15
|
+
state.resolveSummary = summary;
|
|
16
|
+
await hooks.afterResolve?.(state, summary);
|
|
17
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { ScanState } from "../engine.js";
|
|
3
|
+
import type { ScanHooks } from "../hooks.js";
|
|
4
|
+
/**
|
|
5
|
+
* Phase 4 — present the buffer to the user for confirmation, then set
|
|
6
|
+
* `state.review` to either `"commit"` or `"abort"`. Today this is a simple
|
|
7
|
+
* auto-commit path so the engine compiles; the full Ink TUI is a separate
|
|
8
|
+
* follow-up (review TUI task).
|
|
9
|
+
*/
|
|
10
|
+
export declare function reviewPhase(_db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 4 — present the buffer to the user for confirmation, then set
|
|
3
|
+
* `state.review` to either `"commit"` or `"abort"`. Today this is a simple
|
|
4
|
+
* auto-commit path so the engine compiles; the full Ink TUI is a separate
|
|
5
|
+
* follow-up (review TUI task).
|
|
6
|
+
*/
|
|
7
|
+
export async function reviewPhase(_db, state, hooks) {
|
|
8
|
+
const snapshot = state.buffer.snapshot();
|
|
9
|
+
await hooks.beforeReview?.(state, snapshot);
|
|
10
|
+
state.review = "commit";
|
|
11
|
+
await hooks.afterReview?.(state);
|
|
12
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single-typed event sink scan-worker tools emit into as they write to the DB.
|
|
3
|
+
* Replaces the bus + buffer for in-flight progress: one consumer at a time
|
|
4
|
+
* (dashboard or plain-hooks counters) reads ticks per chunk.
|
|
5
|
+
*/
|
|
6
|
+
export interface ScanProgressEvent {
|
|
7
|
+
readonly chunkId: string;
|
|
8
|
+
readonly kind: "tx" | "question";
|
|
9
|
+
}
|
|
10
|
+
export interface ScanProgress {
|
|
11
|
+
emit(event: ScanProgressEvent): void;
|
|
12
|
+
subscribe(handler: (e: ScanProgressEvent) => void): () => void;
|
|
13
|
+
}
|
|
14
|
+
export declare function createProgress(): ScanProgress;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export function createProgress() {
|
|
2
|
+
const subscribers = new Set();
|
|
3
|
+
return {
|
|
4
|
+
emit(event) {
|
|
5
|
+
for (const fn of subscribers) {
|
|
6
|
+
try {
|
|
7
|
+
fn(event);
|
|
8
|
+
}
|
|
9
|
+
catch (err) {
|
|
10
|
+
console.error(`[progress listener] ${err instanceof Error ? err.message : String(err)}`);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
subscribe(handler) {
|
|
15
|
+
subscribers.add(handler);
|
|
16
|
+
return () => {
|
|
17
|
+
subscribers.delete(handler);
|
|
18
|
+
};
|
|
19
|
+
},
|
|
20
|
+
};
|
|
21
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { ClosedQuestion } from "../db/queries/questions.js";
|
|
3
|
+
/**
|
|
4
|
+
* Compact every closed question into a memories row (category `scanning_hint`).
|
|
5
|
+
* The next scan's deterministic memoryRulePass picks them up. Dedups on body —
|
|
6
|
+
* an identical rule for the same kind + prompt won't be re-inserted.
|
|
7
|
+
*/
|
|
8
|
+
export declare function synthesizeMemoryRules(db: Database.Database, closures: readonly ClosedQuestion[]): number;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compact every closed question into a memories row (category `scanning_hint`).
|
|
3
|
+
* The next scan's deterministic memoryRulePass picks them up. Dedups on body —
|
|
4
|
+
* an identical rule for the same kind + prompt won't be re-inserted.
|
|
5
|
+
*/
|
|
6
|
+
export function synthesizeMemoryRules(db, closures) {
|
|
7
|
+
if (closures.length === 0)
|
|
8
|
+
return 0;
|
|
9
|
+
let inserted = 0;
|
|
10
|
+
const exists = db.prepare(`SELECT 1 FROM memories WHERE category = ? AND content = ? LIMIT 1`);
|
|
11
|
+
const insert = db.prepare(`INSERT INTO memories (content, category) VALUES (?, ?)`);
|
|
12
|
+
for (const c of closures) {
|
|
13
|
+
const body = formatRule(c);
|
|
14
|
+
if (exists.get("scanning_hint", body))
|
|
15
|
+
continue;
|
|
16
|
+
insert.run(body, "scanning_hint");
|
|
17
|
+
inserted++;
|
|
18
|
+
}
|
|
19
|
+
return inserted;
|
|
20
|
+
}
|
|
21
|
+
function formatRule(c) {
|
|
22
|
+
const kindLabel = c.kind ?? "general";
|
|
23
|
+
return `[${kindLabel}] ${c.prompt.replace(/\s+/g, " ").trim()} -> ${c.answer.trim()}`;
|
|
24
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import { type QuestionRow } from "../db/queries/questions.js";
|
|
3
|
+
export interface ResolverContext {
|
|
4
|
+
readonly db: Database.Database;
|
|
5
|
+
readonly tally: Record<string, number>;
|
|
6
|
+
}
|
|
7
|
+
export interface ResolverPass {
|
|
8
|
+
readonly name: string;
|
|
9
|
+
readonly kinds: readonly string[];
|
|
10
|
+
/** Try to close one question. Returns the answer if closed, else null. */
|
|
11
|
+
tryResolve(u: QuestionRow, ctx: ResolverContext): Promise<string | null>;
|
|
12
|
+
}
|
|
13
|
+
export interface ResolveSummary {
|
|
14
|
+
readonly total: number;
|
|
15
|
+
readonly resolved: number;
|
|
16
|
+
readonly remaining: number;
|
|
17
|
+
readonly tally: Readonly<Record<string, number>>;
|
|
18
|
+
}
|
|
19
|
+
export interface RunResolveOpts {
|
|
20
|
+
db: Database.Database;
|
|
21
|
+
/** Narrows to a single scan's questions. Omit = every question. */
|
|
22
|
+
scanId?: string;
|
|
23
|
+
interactive?: boolean;
|
|
24
|
+
promptUser?: (prompt: string, options?: string[], facts?: any) => Promise<string>;
|
|
25
|
+
onProgress?: (event: {
|
|
26
|
+
phase: "tool" | "responding";
|
|
27
|
+
toolName?: string;
|
|
28
|
+
toolCount: number;
|
|
29
|
+
elapsedMs: number;
|
|
30
|
+
}) => void;
|
|
31
|
+
}
|
|
32
|
+
export declare const RESOLVER_PASSES: readonly ResolverPass[];
|
|
33
|
+
/**
|
|
34
|
+
* Single entry point shared by the in-scan resolve phase and the standalone
|
|
35
|
+
* `plasalid resolve` command. Runs deterministic passes first, then (when
|
|
36
|
+
* interactive) hands the leftovers to the LLM resolver agent. Closed
|
|
37
|
+
* questions get compacted into scanning_hint memories.
|
|
38
|
+
*/
|
|
39
|
+
export declare function runResolve(opts: RunResolveOpts): Promise<ResolveSummary>;
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { closeQuestion, listQuestions, countQuestions, } from "../db/queries/questions.js";
|
|
2
|
+
import { updatePosting } from "../db/queries/transactions.js";
|
|
3
|
+
import { runResolveAgent } from "../ai/agent.js";
|
|
4
|
+
import { synthesizeMemoryRules } from "./resolver-memory.js";
|
|
5
|
+
import { converge } from "./converge.js";
|
|
6
|
+
const MAX_AGENT_PASSES = 3;
|
|
7
|
+
/**
|
|
8
|
+
* Apply deterministic passes via memory_rules lookups. Closes any question
|
|
9
|
+
* whose prompt has a stored scanning_hint that already encodes the answer.
|
|
10
|
+
*/
|
|
11
|
+
const memoryRulePass = {
|
|
12
|
+
name: "memory_rule",
|
|
13
|
+
kinds: ["uncategorized", "uncategorized_expense", "duplicate", "correlation", "recurrence_candidate", "similar_accounts", "boundary_continuation", "scan_truncated", "scan_commit_failure"],
|
|
14
|
+
async tryResolve(u, ctx) {
|
|
15
|
+
const rules = ctx.db
|
|
16
|
+
.prepare(`SELECT content FROM memories WHERE category = 'scanning_hint'`)
|
|
17
|
+
.all();
|
|
18
|
+
const key = canonicalKey(u);
|
|
19
|
+
for (const r of rules) {
|
|
20
|
+
const match = parseRule(r.content);
|
|
21
|
+
if (!match)
|
|
22
|
+
continue;
|
|
23
|
+
if (match.key === key)
|
|
24
|
+
return match.answer;
|
|
25
|
+
}
|
|
26
|
+
return null;
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* For an uncategorized expense whose transaction has a merchant with a
|
|
31
|
+
* stored default_account_id, apply the default to every expense posting on
|
|
32
|
+
* that transaction.
|
|
33
|
+
*/
|
|
34
|
+
const merchantDefaultPass = {
|
|
35
|
+
name: "merchant_default",
|
|
36
|
+
kinds: ["uncategorized_expense"],
|
|
37
|
+
async tryResolve(u, ctx) {
|
|
38
|
+
if (!u.transaction_id)
|
|
39
|
+
return null;
|
|
40
|
+
const tx = ctx.db
|
|
41
|
+
.prepare(`SELECT merchant_id FROM transactions WHERE id = ?`)
|
|
42
|
+
.get(u.transaction_id);
|
|
43
|
+
if (!tx?.merchant_id)
|
|
44
|
+
return null;
|
|
45
|
+
const merchant = ctx.db
|
|
46
|
+
.prepare(`SELECT default_account_id FROM merchants WHERE id = ?`)
|
|
47
|
+
.get(tx.merchant_id);
|
|
48
|
+
const target = merchant?.default_account_id;
|
|
49
|
+
if (!target)
|
|
50
|
+
return null;
|
|
51
|
+
const postings = ctx.db
|
|
52
|
+
.prepare(`SELECT p.id FROM postings p
|
|
53
|
+
JOIN accounts a ON a.id = p.account_id
|
|
54
|
+
WHERE p.transaction_id = ? AND a.id = 'expense:uncategorized'`)
|
|
55
|
+
.all(u.transaction_id);
|
|
56
|
+
if (postings.length === 0)
|
|
57
|
+
return null;
|
|
58
|
+
for (const p of postings) {
|
|
59
|
+
updatePosting(ctx.db, p.id, { account_id: target });
|
|
60
|
+
}
|
|
61
|
+
return target;
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
export const RESOLVER_PASSES = [
|
|
65
|
+
memoryRulePass,
|
|
66
|
+
merchantDefaultPass,
|
|
67
|
+
];
|
|
68
|
+
/**
|
|
69
|
+
* Single entry point shared by the in-scan resolve phase and the standalone
|
|
70
|
+
* `plasalid resolve` command. Runs deterministic passes first, then (when
|
|
71
|
+
* interactive) hands the leftovers to the LLM resolver agent. Closed
|
|
72
|
+
* questions get compacted into scanning_hint memories.
|
|
73
|
+
*/
|
|
74
|
+
export async function runResolve(opts) {
|
|
75
|
+
const { db } = opts;
|
|
76
|
+
const tally = {};
|
|
77
|
+
const closures = [];
|
|
78
|
+
const initial = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
|
|
79
|
+
const total = initial.length;
|
|
80
|
+
if (total === 0) {
|
|
81
|
+
return { total: 0, resolved: 0, remaining: 0, tally };
|
|
82
|
+
}
|
|
83
|
+
for (const u of initial) {
|
|
84
|
+
const passes = matchingPasses(u);
|
|
85
|
+
if (passes.length === 0)
|
|
86
|
+
continue;
|
|
87
|
+
const result = await tryPasses(u, passes, { db, tally });
|
|
88
|
+
if (!result)
|
|
89
|
+
continue;
|
|
90
|
+
const closed = closeQuestion(db, u.id, result.answer);
|
|
91
|
+
if (!closed)
|
|
92
|
+
continue;
|
|
93
|
+
closures.push(closed);
|
|
94
|
+
tally[result.passName] = (tally[result.passName] ?? 0) + 1;
|
|
95
|
+
}
|
|
96
|
+
const interactive = opts.interactive ?? true;
|
|
97
|
+
if (interactive && countRemaining(db, opts.scanId) > 0) {
|
|
98
|
+
await runAgentLoop(opts, closures, tally);
|
|
99
|
+
}
|
|
100
|
+
synthesizeMemoryRules(db, closures);
|
|
101
|
+
const remaining = countRemaining(db, opts.scanId);
|
|
102
|
+
return { total, resolved: total - remaining, remaining, tally };
|
|
103
|
+
}
|
|
104
|
+
function matchingPasses(u) {
|
|
105
|
+
if (!u.kind)
|
|
106
|
+
return [];
|
|
107
|
+
return RESOLVER_PASSES.filter(p => p.kinds.includes(u.kind));
|
|
108
|
+
}
|
|
109
|
+
async function tryPasses(u, passes, ctx) {
|
|
110
|
+
for (const pass of passes) {
|
|
111
|
+
let answer;
|
|
112
|
+
try {
|
|
113
|
+
answer = await pass.tryResolve(u, ctx);
|
|
114
|
+
}
|
|
115
|
+
catch (err) {
|
|
116
|
+
console.error(`[resolver pass ${pass.name}] ${err instanceof Error ? err.message : String(err)}`);
|
|
117
|
+
answer = null;
|
|
118
|
+
}
|
|
119
|
+
if (answer != null)
|
|
120
|
+
return { passName: pass.name, answer };
|
|
121
|
+
}
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
function countRemaining(db, scanId) {
|
|
125
|
+
return scanId ? countQuestions(db, { scan_id: scanId }) : countQuestions(db);
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Stall-protected outer loop around the LLM resolver. Each pass re-fetches
|
|
129
|
+
* leftover questions, hands them to the agent, and the agent closes what it
|
|
130
|
+
* can via close_question / ask_user. The loop stops when nothing closes
|
|
131
|
+
* between passes. After each pass we diff the pre/post set to recover the
|
|
132
|
+
* (prompt, kind, answer) tuples the agent closed without going through the
|
|
133
|
+
* memoryRulePass path.
|
|
134
|
+
*/
|
|
135
|
+
async function runAgentLoop(opts, closures, tally) {
|
|
136
|
+
const { db } = opts;
|
|
137
|
+
await converge({
|
|
138
|
+
initial: countRemaining(db, opts.scanId),
|
|
139
|
+
maxAttempts: MAX_AGENT_PASSES,
|
|
140
|
+
isDone: (n) => n === 0,
|
|
141
|
+
isStalled: (curr, prev) => curr >= prev,
|
|
142
|
+
onPass: async () => {
|
|
143
|
+
const before = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
|
|
144
|
+
if (before.length === 0)
|
|
145
|
+
return 0;
|
|
146
|
+
await runResolveAgent({
|
|
147
|
+
db,
|
|
148
|
+
prompt: {},
|
|
149
|
+
initialMessages: [{ role: "user", content: buildResolveUserMessage(before) }],
|
|
150
|
+
agentCtx: {
|
|
151
|
+
interactive: true,
|
|
152
|
+
promptUser: opts.promptUser,
|
|
153
|
+
onQuestionClosed: (closed) => {
|
|
154
|
+
closures.push(closed);
|
|
155
|
+
tally["agent_resolution"] = (tally["agent_resolution"] ?? 0) + 1;
|
|
156
|
+
},
|
|
157
|
+
},
|
|
158
|
+
onProgress: opts.onProgress,
|
|
159
|
+
});
|
|
160
|
+
return countRemaining(db, opts.scanId);
|
|
161
|
+
},
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
function buildResolveUserMessage(questions) {
|
|
165
|
+
const lines = [`${questions.length} question(s) to resolve.`, ``, `Questions:`];
|
|
166
|
+
for (const c of questions) {
|
|
167
|
+
const options = parseOptions(c.options_json);
|
|
168
|
+
const optionsStr = options.length > 0 ? ` | options=[${options.join(" / ")}]` : "";
|
|
169
|
+
lines.push(`- ${c.id} | kind=${c.kind ?? "(none)"} | tx=${c.transaction_id ?? "(none)"} | acct=${c.account_id ?? "(none)"} | file=${c.file_id ?? "(none)"}${optionsStr}`, ` prompt: ${c.prompt.replace(/\n/g, " ")}`);
|
|
170
|
+
}
|
|
171
|
+
return lines.join("\n");
|
|
172
|
+
}
|
|
173
|
+
function parseOptions(json) {
|
|
174
|
+
if (!json)
|
|
175
|
+
return [];
|
|
176
|
+
try {
|
|
177
|
+
const parsed = JSON.parse(json);
|
|
178
|
+
return Array.isArray(parsed) ? parsed.filter((o) => typeof o === "string") : [];
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
return [];
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
function canonicalKey(u) {
|
|
185
|
+
return `[${u.kind ?? "general"}] ${u.prompt.replace(/\s+/g, " ").trim()}`;
|
|
186
|
+
}
|
|
187
|
+
function parseRule(body) {
|
|
188
|
+
const idx = body.lastIndexOf(" -> ");
|
|
189
|
+
if (idx < 0)
|
|
190
|
+
return null;
|
|
191
|
+
const key = body.slice(0, idx).trim();
|
|
192
|
+
const answer = body.slice(idx + 4).trim();
|
|
193
|
+
if (!key || !answer)
|
|
194
|
+
return null;
|
|
195
|
+
return { key, answer };
|
|
196
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight Result helpers shared across scanner subdomains. Use this
|
|
3
|
+
* instead of inline try/catch when a function can fail with a human-readable
|
|
4
|
+
* reason and the caller needs to branch on the outcome (decrypt, chunk parse,
|
|
5
|
+
* commit-one-transaction). Distinct from concurrency.ts `Settled<T>` — that
|
|
6
|
+
* type is owned by `runWithConcurrency` and includes an `error: unknown`;
|
|
7
|
+
* `Result<T>` stringifies the error up front for ergonomic message handling.
|
|
8
|
+
*/
|
|
9
|
+
export type Result<T> = {
|
|
10
|
+
ok: true;
|
|
11
|
+
value: T;
|
|
12
|
+
} | {
|
|
13
|
+
ok: false;
|
|
14
|
+
error: string;
|
|
15
|
+
};
|
|
16
|
+
export declare function errorMessage(err: unknown): string;
|
|
17
|
+
export declare function tryExecute<T>(fn: () => Promise<T> | T): Promise<Result<T>>;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight Result helpers shared across scanner subdomains. Use this
|
|
3
|
+
* instead of inline try/catch when a function can fail with a human-readable
|
|
4
|
+
* reason and the caller needs to branch on the outcome (decrypt, chunk parse,
|
|
5
|
+
* commit-one-transaction). Distinct from concurrency.ts `Settled<T>` — that
|
|
6
|
+
* type is owned by `runWithConcurrency` and includes an `error: unknown`;
|
|
7
|
+
* `Result<T>` stringifies the error up front for ergonomic message handling.
|
|
8
|
+
*/
|
|
9
|
+
export function errorMessage(err) {
|
|
10
|
+
return err instanceof Error ? err.message : String(err);
|
|
11
|
+
}
|
|
12
|
+
export async function tryExecute(fn) {
|
|
13
|
+
try {
|
|
14
|
+
return { ok: true, value: await fn() };
|
|
15
|
+
}
|
|
16
|
+
catch (err) {
|
|
17
|
+
return { ok: false, error: errorMessage(err) };
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generic "drive a loop with named hooks" helper.
|
|
3
|
+
*
|
|
4
|
+
* The driver owns counting passes, stall detection, and the iteration cap.
|
|
5
|
+
* Everything else (work performed each pass, what to print when, how to react
|
|
6
|
+
* to stall vs success vs failure) lives in the hooks the caller supplies.
|
|
7
|
+
*
|
|
8
|
+
* The state `S` is whatever quantity decides "are we done?" — typically a
|
|
9
|
+
* remaining-work count, but it can be any value you can compare.
|
|
10
|
+
*/
|
|
11
|
+
export interface RunPassesOpts<S> {
|
|
12
|
+
/** Initial state (e.g. `countOpenUnknowns(db)`). */
|
|
13
|
+
initial: S;
|
|
14
|
+
/** Maximum number of passes before declaring failure. Must be >= 1. */
|
|
15
|
+
maxAttempts: number;
|
|
16
|
+
/** True when the work is finished and the loop should stop cleanly. */
|
|
17
|
+
isDone: (state: S) => boolean;
|
|
18
|
+
/**
|
|
19
|
+
* True when this pass made no progress vs the previous pass. Fires after
|
|
20
|
+
* the first pass at the earliest.
|
|
21
|
+
*/
|
|
22
|
+
isStalled: (curr: S, prev: S) => boolean;
|
|
23
|
+
/** Run one pass; return the new state. Pass numbers are 1-indexed. */
|
|
24
|
+
onPass: (pass: number, state: S) => Promise<S>;
|
|
25
|
+
onStart?: (state: S) => void;
|
|
26
|
+
onStall?: (state: S) => void;
|
|
27
|
+
onSuccess?: (state: S) => void;
|
|
28
|
+
onFail?: (state: S) => void;
|
|
29
|
+
}
|
|
30
|
+
export declare function runPasses<S>(opts: RunPassesOpts<S>): Promise<S>;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export async function runPasses(opts) {
|
|
2
|
+
let state = opts.initial;
|
|
3
|
+
let prev = state;
|
|
4
|
+
opts.onStart?.(state);
|
|
5
|
+
for (let pass = 1; pass <= opts.maxAttempts && !opts.isDone(state); pass++) {
|
|
6
|
+
if (pass > 1 && opts.isStalled(state, prev)) {
|
|
7
|
+
opts.onStall?.(state);
|
|
8
|
+
return state;
|
|
9
|
+
}
|
|
10
|
+
prev = state;
|
|
11
|
+
state = await opts.onPass(pass, state);
|
|
12
|
+
}
|
|
13
|
+
(opts.isDone(state) ? opts.onSuccess : opts.onFail)?.(state);
|
|
14
|
+
return state;
|
|
15
|
+
}
|
package/dist/scanner/unlock.js
CHANGED
|
@@ -115,7 +115,7 @@ export function persistUnlockOutcome(db, filePath, outcome) {
|
|
|
115
115
|
spinner.succeed(`Saved password for pattern ${pattern} in secure vault.`);
|
|
116
116
|
}
|
|
117
117
|
catch (err) {
|
|
118
|
-
spinner.fail(`Could not save password: ${err.message}`);
|
|
118
|
+
spinner.fail(`Could not save password: ${err instanceof Error ? err.message : String(err)}`);
|
|
119
119
|
throw err;
|
|
120
120
|
}
|
|
121
121
|
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { Chunk } from "./engine.js";
|
|
3
|
+
import type { ScanHooks } from "./hooks.js";
|
|
4
|
+
import type { ScanProgress } from "./progress.js";
|
|
5
|
+
export interface ScanWorkerDeps {
|
|
6
|
+
readonly db: Database.Database;
|
|
7
|
+
readonly scanId: string;
|
|
8
|
+
readonly scannedFileId: string | undefined;
|
|
9
|
+
readonly progress: ScanProgress;
|
|
10
|
+
readonly chunk: Chunk;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
14
|
+
* scanId + progress sink + scanned_files row injected through the agent
|
|
15
|
+
* context. Agent's record_transactions / note_question calls write directly to
|
|
16
|
+
* the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
|
|
17
|
+
* as a `chunk_failed` question so the resolver can pick them up.
|
|
18
|
+
*/
|
|
19
|
+
export declare function runScanWorker(deps: ScanWorkerDeps, hooks: ScanHooks): Promise<void>;
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { randomUUID } from "crypto";
|
|
2
|
+
import { runScanAgent } from "../ai/agent.js";
|
|
3
|
+
import { recordQuestion } from "../db/queries/questions.js";
|
|
4
|
+
import { buildDocumentBlock } from "./pdf/pdf.js";
|
|
5
|
+
import { tryExecute } from "./result.js";
|
|
6
|
+
/**
|
|
7
|
+
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
8
|
+
* scanId + progress sink + scanned_files row injected through the agent
|
|
9
|
+
* context. Agent's record_transactions / note_question calls write directly to
|
|
10
|
+
* the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
|
|
11
|
+
* as a `chunk_failed` question so the resolver can pick them up.
|
|
12
|
+
*/
|
|
13
|
+
export async function runScanWorker(deps, hooks) {
|
|
14
|
+
const workerId = `cw:${randomUUID()}`;
|
|
15
|
+
hooks.onWorkerStart?.(workerId, deps.chunk);
|
|
16
|
+
const outcome = await tryExecute(() => runScanAgent({
|
|
17
|
+
db: deps.db,
|
|
18
|
+
initialMessages: [
|
|
19
|
+
{
|
|
20
|
+
role: "user",
|
|
21
|
+
content: [
|
|
22
|
+
buildDocumentBlock(deps.chunk.bytes, deps.chunk.fileName, deps.chunk.mime),
|
|
23
|
+
{ type: "text", text: buildChunkPrompt(deps.chunk) },
|
|
24
|
+
],
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
prompt: { fileName: deps.chunk.fileName },
|
|
28
|
+
agentCtx: {
|
|
29
|
+
interactive: false,
|
|
30
|
+
scanId: deps.scanId,
|
|
31
|
+
fileId: deps.scannedFileId,
|
|
32
|
+
chunkId: deps.chunk.chunkId,
|
|
33
|
+
progress: deps.progress,
|
|
34
|
+
},
|
|
35
|
+
}));
|
|
36
|
+
hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
|
|
37
|
+
if (!outcome.ok)
|
|
38
|
+
recordChunkFailure(deps, outcome.error);
|
|
39
|
+
}
|
|
40
|
+
function recordChunkFailure(deps, error) {
|
|
41
|
+
try {
|
|
42
|
+
recordQuestion(deps.db, {
|
|
43
|
+
file_id: deps.scannedFileId ?? null,
|
|
44
|
+
scan_id: deps.scanId,
|
|
45
|
+
transaction_id: null,
|
|
46
|
+
account_id: null,
|
|
47
|
+
kind: "chunk_failed",
|
|
48
|
+
prompt: `Chunk ${deps.chunk.fileName} p${deps.chunk.pageNumber} failed to parse: ${error}.`,
|
|
49
|
+
});
|
|
50
|
+
deps.progress.emit({ chunkId: deps.chunk.chunkId, kind: "question" });
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
// failure to record a failure shouldn't crash the file worker
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
function buildChunkPrompt(chunk) {
|
|
57
|
+
return [
|
|
58
|
+
`You are parsing page ${chunk.pageNumber} of ${chunk.totalPages} of ${chunk.fileName}.`,
|
|
59
|
+
``,
|
|
60
|
+
`Steps:`,
|
|
61
|
+
`1. Call list_accounts to see what already exists.`,
|
|
62
|
+
`2. If this page reveals an account that isn't in the chart yet, call create_account once.`,
|
|
63
|
+
`3. For every transaction on this page, call record_transactions (plural) with all rows in one batch.`,
|
|
64
|
+
`4. If the first or last row looks incomplete (no date, or no amount column visible — the row likely continues onto an adjacent page), call note_question with kind="boundary_continuation" and the raw row text. Do NOT invent missing fields.`,
|
|
65
|
+
`5. When done with this page, call mark_file_scanned with a short summary.`,
|
|
66
|
+
].join("\n");
|
|
67
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { ScanBuffer } from "../buffer/types.js";
|
|
3
|
+
import type { Chunk, ScanHooks } from "../engine/types.js";
|
|
4
|
+
export interface ChunkWorkerDeps {
|
|
5
|
+
readonly db: Database.Database;
|
|
6
|
+
readonly buffer: ScanBuffer;
|
|
7
|
+
readonly chunk: Chunk;
|
|
8
|
+
}
|
|
9
|
+
export interface ChunkWorkerResult {
|
|
10
|
+
readonly workerId: string;
|
|
11
|
+
readonly ok: boolean;
|
|
12
|
+
readonly error?: string;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
16
|
+
* the shared Buffer + chunkId injected. The agent's `record_transactions`
|
|
17
|
+
* calls land in the shared buffer; events fan out to the auditor and the
|
|
18
|
+
* dashboard.
|
|
19
|
+
*/
|
|
20
|
+
export declare function runChunkWorker(deps: ChunkWorkerDeps, hooks: ScanHooks): Promise<ChunkWorkerResult>;
|