plasalid 0.3.5 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +28 -39
  2. package/dist/accounts/taxonomy.d.ts +1 -1
  3. package/dist/accounts/taxonomy.js +2 -2
  4. package/dist/ai/agent.d.ts +6 -5
  5. package/dist/ai/agent.js +7 -6
  6. package/dist/ai/memory.d.ts +12 -5
  7. package/dist/ai/memory.js +12 -0
  8. package/dist/ai/personas.d.ts +10 -0
  9. package/dist/ai/personas.js +123 -0
  10. package/dist/ai/prompt-sections.d.ts +44 -0
  11. package/dist/ai/prompt-sections.js +89 -0
  12. package/dist/ai/system-prompt.d.ts +3 -3
  13. package/dist/ai/system-prompt.js +44 -165
  14. package/dist/ai/tools/index.js +12 -7
  15. package/dist/ai/tools/ingest.d.ts +2 -1
  16. package/dist/ai/tools/ingest.js +220 -83
  17. package/dist/ai/tools/read.js +31 -0
  18. package/dist/ai/tools/review.d.ts +2 -0
  19. package/dist/ai/tools/review.js +362 -0
  20. package/dist/ai/tools/scan.js +4 -2
  21. package/dist/ai/tools/types.d.ts +23 -3
  22. package/dist/cli/commands/review.d.ts +2 -0
  23. package/dist/cli/commands/review.js +15 -0
  24. package/dist/cli/commands/scan.d.ts +4 -2
  25. package/dist/cli/commands/scan.js +147 -19
  26. package/dist/cli/index.js +11 -8
  27. package/dist/cli/ink/scan_dashboard.d.ts +38 -0
  28. package/dist/cli/ink/scan_dashboard.js +62 -0
  29. package/dist/cli/ux.d.ts +2 -1
  30. package/dist/cli/ux.js +36 -2
  31. package/dist/db/queries/account_balance.d.ts +1 -0
  32. package/dist/db/queries/concerns.d.ts +47 -0
  33. package/dist/db/queries/concerns.js +87 -0
  34. package/dist/db/queries/journal.d.ts +74 -8
  35. package/dist/db/queries/journal.js +131 -19
  36. package/dist/db/queries/recurrences.d.ts +33 -0
  37. package/dist/db/queries/recurrences.js +130 -0
  38. package/dist/db/schema.js +25 -2
  39. package/dist/reviewer/pipeline.d.ts +18 -0
  40. package/dist/reviewer/pipeline.js +46 -0
  41. package/dist/reviewer/prompts.d.ts +12 -0
  42. package/dist/reviewer/prompts.js +22 -0
  43. package/dist/scanner/account_mutex.d.ts +1 -0
  44. package/dist/scanner/account_mutex.js +16 -0
  45. package/dist/scanner/buffer.d.ts +48 -0
  46. package/dist/scanner/buffer.js +63 -0
  47. package/dist/scanner/concurrency.d.ts +14 -0
  48. package/dist/scanner/concurrency.js +31 -0
  49. package/dist/scanner/decrypt_queue.d.ts +57 -0
  50. package/dist/scanner/decrypt_queue.js +96 -0
  51. package/dist/scanner/pipeline.d.ts +46 -18
  52. package/dist/scanner/pipeline.js +250 -97
  53. package/dist/scanner/prompts.js +1 -1
  54. package/package.json +1 -1
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Run an array of async task factories with a fixed concurrency bound. Resolves
3
+ * to an array of results in the same order as the input tasks (regardless of
4
+ * completion order). Any rejection settles that slot with `undefined` and the
5
+ * caller is responsible for tracking failures — but since each task is wrapped
6
+ * in `Promise.resolve()` and pushed through `try/catch`, one task throwing
7
+ * never aborts the rest of the run.
8
+ *
9
+ * No new dependency. Simple worker-pool: kicks off up to `n` tasks, then each
10
+ * worker pulls the next index from a shared cursor until the queue is drained.
11
+ */
12
+ export async function runWithConcurrency(tasks, n) {
13
+ const results = new Array(tasks.length);
14
+ const workerCount = Math.max(1, Math.min(n, tasks.length));
15
+ let cursor = 0;
16
+ async function worker() {
17
+ while (true) {
18
+ const index = cursor++;
19
+ if (index >= tasks.length)
20
+ return;
21
+ try {
22
+ results[index] = await tasks[index]();
23
+ }
24
+ catch (err) {
25
+ results[index] = { error: err };
26
+ }
27
+ }
28
+ }
29
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
30
+ return results;
31
+ }
@@ -0,0 +1,57 @@
1
+ import type Database from "libsql";
2
+ import type { ScannedFile } from "./walker.js";
3
+ export interface DecryptedFile {
4
+ path: string;
5
+ fileName: string;
6
+ relPath: string;
7
+ hash: string;
8
+ mime: string;
9
+ decryptedBytes: Buffer;
10
+ /** True if a prior scan covered this hash; only present when --force is set. */
11
+ replacesPriorScannedFileId?: string;
12
+ }
13
+ export interface SkippedFile {
14
+ file: ScannedFile;
15
+ /** id of the scanned_files row that already has this hash. */
16
+ existingScannedFileId: string;
17
+ }
18
+ export interface FailedFile {
19
+ file: ScannedFile;
20
+ error: string;
21
+ }
22
+ export interface DecryptQueueResult {
23
+ decrypted: DecryptedFile[];
24
+ skipped: SkippedFile[];
25
+ failed: FailedFile[];
26
+ }
27
+ export interface DecryptQueueOptions {
28
+ /** Re-decrypt and queue files that match a prior hash. */
29
+ force: boolean;
30
+ /** If false, never prompt for a password; treat unlock failure as failed. */
31
+ interactive: boolean;
32
+ /** Called as each file finishes (any outcome) so a spinner can update its label. */
33
+ onProgress?: (event: {
34
+ index: number;
35
+ total: number;
36
+ fileName: string;
37
+ outcome: "decrypted" | "skipped" | "failed";
38
+ }) => void;
39
+ }
40
+ /**
41
+ * Phase 1 of scan: walk every file in the queue, decrypt any that need it,
42
+ * and return a partition (decrypted / skipped / failed). The actual agent
43
+ * work in Phase 2 only sees `decrypted` — no password prompts during the
44
+ * parallel scan loop.
45
+ *
46
+ * Failures don't abort; the caller (CLI) confirms whether to proceed.
47
+ */
48
+ export declare function decryptQueue(db: Database.Database, files: ScannedFile[], opts: DecryptQueueOptions): Promise<DecryptQueueResult>;
49
+ /**
50
+ * Interactive go/no-go gate when some files failed to decrypt. Returns true
51
+ * if the caller should proceed with the decrypted set, false to abort the
52
+ * whole scan run.
53
+ *
54
+ * Returns true automatically when interactive is false (CI / non-TTY runs);
55
+ * the caller is expected to inspect `result.failed` and report.
56
+ */
57
+ export declare function confirmProceedAfterFailures(result: DecryptQueueResult, interactive: boolean): Promise<boolean>;
@@ -0,0 +1,96 @@
1
+ import chalk from "chalk";
2
+ import inquirer from "inquirer";
3
+ import { readPdf } from "./pdf.js";
4
+ import { unlockIfNeeded, persistUnlockOutcome } from "./unlock.js";
5
+ /**
6
+ * Phase 1 of scan: walk every file in the queue, decrypt any that need it,
7
+ * and return a partition (decrypted / skipped / failed). The actual agent
8
+ * work in Phase 2 only sees `decrypted` — no password prompts during the
9
+ * parallel scan loop.
10
+ *
11
+ * Failures don't abort; the caller (CLI) confirms whether to proceed.
12
+ */
13
+ export async function decryptQueue(db, files, opts) {
14
+ const decrypted = [];
15
+ const skipped = [];
16
+ const failed = [];
17
+ for (let i = 0; i < files.length; i++) {
18
+ const f = files[i];
19
+ let pdf;
20
+ try {
21
+ pdf = readPdf(f.path);
22
+ }
23
+ catch (err) {
24
+ failed.push({ file: f, error: `read failed: ${err.message}` });
25
+ opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "failed" });
26
+ continue;
27
+ }
28
+ const existing = findScannedByHash(db, pdf.hash);
29
+ if (existing && !opts.force) {
30
+ skipped.push({ file: f, existingScannedFileId: existing.id });
31
+ opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "skipped" });
32
+ continue;
33
+ }
34
+ try {
35
+ const unlocked = await unlockIfNeeded({
36
+ db,
37
+ filePath: f.path,
38
+ bytes: pdf.bytes,
39
+ interactive: opts.interactive,
40
+ });
41
+ persistUnlockOutcome(db, f.path, unlocked.outcome);
42
+ decrypted.push({
43
+ path: f.path,
44
+ fileName: f.name,
45
+ relPath: f.relPath,
46
+ hash: pdf.hash,
47
+ mime: pdf.mime,
48
+ decryptedBytes: unlocked.decrypted,
49
+ replacesPriorScannedFileId: existing?.id,
50
+ });
51
+ opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "decrypted" });
52
+ }
53
+ catch (err) {
54
+ failed.push({ file: f, error: err.message ?? "unlock failed" });
55
+ opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "failed" });
56
+ }
57
+ }
58
+ return { decrypted, skipped, failed };
59
+ }
60
+ /**
61
+ * Interactive go/no-go gate when some files failed to decrypt. Returns true
62
+ * if the caller should proceed with the decrypted set, false to abort the
63
+ * whole scan run.
64
+ *
65
+ * Returns true automatically when interactive is false (CI / non-TTY runs);
66
+ * the caller is expected to inspect `result.failed` and report.
67
+ */
68
+ export async function confirmProceedAfterFailures(result, interactive) {
69
+ if (result.failed.length === 0)
70
+ return true;
71
+ console.log("");
72
+ console.log(chalk.yellow(`${result.failed.length} file(s) could not be decrypted:`));
73
+ for (const f of result.failed) {
74
+ console.log(` ${chalk.red("✗")} ${f.file.relPath} — ${chalk.dim(f.error)}`);
75
+ }
76
+ if (result.decrypted.length === 0) {
77
+ console.log(chalk.red("Nothing to scan."));
78
+ return false;
79
+ }
80
+ if (!interactive)
81
+ return true;
82
+ const { proceed } = (await inquirer.prompt([
83
+ {
84
+ type: "confirm",
85
+ name: "proceed",
86
+ message: `Proceed scanning the ${result.decrypted.length} file(s) that decrypted successfully?`,
87
+ default: true,
88
+ },
89
+ ]));
90
+ return proceed;
91
+ }
92
+ function findScannedByHash(db, hash) {
93
+ return db
94
+ .prepare(`SELECT id FROM scanned_files WHERE file_hash = ?`)
95
+ .get(hash) ?? null;
96
+ }
@@ -1,32 +1,60 @@
1
+ export type ScanFileStatus = "scanned" | "replaced" | "failed" | "skipped";
1
2
  export interface ScanFileResult {
2
- fileId: string | null;
3
- status: "scanned" | "needs_input" | "failed" | "skipped" | "replaced";
4
- summary?: string;
3
+ name: string;
4
+ relPath: string;
5
+ status: ScanFileStatus;
6
+ entries: number;
7
+ concerns: number;
5
8
  error?: string;
6
- pendingQuestions: number;
7
9
  }
8
- export interface ScanOptions {
9
- interactive?: boolean;
10
- force?: boolean;
11
- onProgress?: (msg: string) => void;
12
- }
13
- export declare function scanFile(filePath: string, opts?: ScanOptions): Promise<ScanFileResult>;
14
10
  export interface ScanSummary {
15
11
  total: number;
16
12
  scanned: number;
17
13
  replaced: number;
18
14
  skipped: number;
19
- needsInput: number;
20
15
  failed: number;
21
- details: {
22
- name: string;
23
- relPath: string;
24
- result: ScanFileResult;
25
- }[];
16
+ concerns: number;
17
+ details: ScanFileResult[];
26
18
  }
27
- export interface RunScanOptions extends ScanOptions {
28
- /** Optional regex (string). Partial, case-insensitive, against the relative path. */
19
+ /** Event hooks the CLI subscribes to. All callbacks are best-effort and ignored if absent. */
20
+ export interface ScanRunEvents {
21
+ decryptStart?: (count: number) => void;
22
+ decryptProgress?: (e: {
23
+ index: number;
24
+ total: number;
25
+ fileName: string;
26
+ outcome: "decrypted" | "skipped" | "failed";
27
+ }) => void;
28
+ decryptDone?: (e: {
29
+ decrypted: number;
30
+ skipped: number;
31
+ failed: number;
32
+ }) => void;
33
+ scanStart?: (e: {
34
+ fileName: string;
35
+ }) => void;
36
+ scanProgress?: (e: {
37
+ fileName: string;
38
+ step: string;
39
+ }) => void;
40
+ scanEnd?: (e: {
41
+ fileName: string;
42
+ status: "scanned" | "failed";
43
+ entries: number;
44
+ concerns: number;
45
+ error?: string;
46
+ }) => void;
47
+ correlating?: (pairs: number) => void;
48
+ committing?: () => void;
49
+ }
50
+ export interface RunScanOptions {
29
51
  regex?: string;
52
+ force?: boolean;
53
+ /** Allow interactive password prompts when a PDF is encrypted. */
54
+ interactive?: boolean;
55
+ /** Max concurrent scan agents. Default 3, hard cap 8. */
56
+ concurrency?: number;
57
+ events?: ScanRunEvents;
30
58
  }
31
59
  export declare function compileMatcher(input: string): RegExp;
32
60
  export declare function runScan(opts?: RunScanOptions): Promise<ScanSummary>;
@@ -1,64 +1,73 @@
1
1
  import { randomUUID } from "crypto";
2
2
  import { getDb } from "../db/connection.js";
3
+ import { countOpenConcerns, } from "../db/queries/concerns.js";
4
+ import { correlatePairs } from "../db/queries/journal.js";
3
5
  import { runScanAgent } from "../ai/agent.js";
4
- import { statusSpinner, makePromptUser, makeAgentOnProgress, } from "../cli/ux.js";
5
- import { readPdf, buildDocumentBlock } from "./pdf.js";
6
+ import { buildDocumentBlock } from "./pdf.js";
6
7
  import { buildScanUserMessage } from "./prompts.js";
7
8
  import { scanDataDir } from "./walker.js";
8
- import { unlockIfNeeded, persistUnlockOutcome } from "./unlock.js";
9
- // ── DB helpers ──────────────────────────────────────────────────────────────
10
- function findScannedByHash(db, hash) {
11
- return db
12
- .prepare(`SELECT id FROM scanned_files WHERE file_hash = ?`)
13
- .get(hash) ?? null;
14
- }
15
- function deleteScannedFile(db, id) {
16
- db.prepare(`DELETE FROM scanned_files WHERE id = ?`).run(id);
17
- }
18
- function insertScannedFile(db, args) {
19
- const id = `sf:${randomUUID()}`;
20
- db.prepare(`INSERT INTO scanned_files (id, path, file_hash, mime, status)
21
- VALUES (?, ?, ?, ?, 'pending')`).run(id, args.path, args.hash, args.mime);
22
- return id;
23
- }
24
- function countPendingQuestions(db, fileId) {
25
- const row = db
26
- .prepare(`SELECT COUNT(*) as n FROM pending_questions WHERE file_id = ? AND resolved_at IS NULL`)
27
- .get(fileId);
28
- return row.n;
29
- }
30
- function setFileStatus(db, id, status, fields = {}) {
31
- db.prepare(`UPDATE scanned_files
32
- SET status = ?, scanned_at = datetime('now'), error = ?, raw_text = COALESCE(?, raw_text)
33
- WHERE id = ?`).run(status, fields.error ?? null, fields.raw_text ?? null, id);
9
+ import { BufferedWriteContext } from "./buffer.js";
10
+ import { runWithConcurrency } from "./concurrency.js";
11
+ import { decryptQueue, confirmProceedAfterFailures, } from "./decrypt_queue.js";
12
+ export function compileMatcher(input) {
13
+ return new RegExp(input, "i");
34
14
  }
35
- // ── Per-file scan ───────────────────────────────────────────────────────────
36
- export async function scanFile(filePath, opts = {}) {
15
+ // ── Orchestration ───────────────────────────────────────────────────────────
16
+ export async function runScan(opts = {}) {
37
17
  const db = getDb();
38
- const file = readPdf(filePath);
39
- const existing = findScannedByHash(db, file.hash);
40
- if (existing && !opts.force) {
41
- return { fileId: existing.id, status: "skipped", pendingQuestions: countPendingQuestions(db, existing.id) };
42
- }
43
- const wasReplaced = !!existing;
44
- if (existing) {
45
- deleteScannedFile(db, existing.id);
46
- }
47
- let unlocked;
48
- try {
49
- unlocked = await unlockIfNeeded({
50
- db,
51
- filePath,
52
- bytes: file.bytes,
53
- interactive: opts.interactive ?? true,
54
- });
55
- }
56
- catch (err) {
57
- return { fileId: null, status: "failed", error: err.message, pendingQuestions: 0 };
18
+ const matcher = opts.regex ? compileMatcher(opts.regex) : null;
19
+ const allFiles = scanDataDir().filter(f => (matcher ? matcher.test(f.relPath) : true));
20
+ const concurrency = Math.min(8, Math.max(1, opts.concurrency ?? 3));
21
+ const interactive = opts.interactive ?? true;
22
+ const events = opts.events;
23
+ // Phase 1 — decrypt all
24
+ events?.decryptStart?.(allFiles.length);
25
+ const decryptResult = await decryptQueue(db, allFiles, {
26
+ force: !!opts.force,
27
+ interactive,
28
+ onProgress: events?.decryptProgress,
29
+ });
30
+ events?.decryptDone?.({
31
+ decrypted: decryptResult.decrypted.length,
32
+ skipped: decryptResult.skipped.length,
33
+ failed: decryptResult.failed.length,
34
+ });
35
+ const proceed = await confirmProceedAfterFailures(decryptResult, interactive);
36
+ if (!proceed) {
37
+ return buildAbortedSummary(allFiles.length, decryptResult);
58
38
  }
59
- persistUnlockOutcome(db, filePath, unlocked.outcome);
60
- const fileId = insertScannedFile(db, { path: filePath, hash: file.hash, mime: file.mime });
61
- const block = buildDocumentBlock(unlocked.decrypted, file.fileName, file.mime);
39
+ // Phase 2 — parallel scan with buffered writes
40
+ const scanResults = await scanInParallel(db, decryptResult.decrypted, { concurrency, events });
41
+ // Phase 3 cross-file correlation pre-commit
42
+ const pairCount = applyCrossFileCorrelations(scanResults);
43
+ events?.correlating?.(pairCount);
44
+ // Phase 4 — per-file commit
45
+ events?.committing?.();
46
+ const fileResults = commitAll(db, decryptResult, scanResults);
47
+ return buildSummary(allFiles.length, fileResults, decryptResult);
48
+ }
49
+ async function scanInParallel(db, files, opts) {
50
+ const tasks = files.map(f => () => scanOneFile(db, f, opts.events));
51
+ const settled = await runWithConcurrency(tasks, opts.concurrency);
52
+ // Worker errors are captured per-slot by runWithConcurrency. scanOneFile
53
+ // itself catches LLM errors and returns a ScanWorkResult with `error` set,
54
+ // so the `{error}` branch only fires for truly unexpected throws.
55
+ return settled.map((r, i) => {
56
+ if (r && typeof r === "object" && "error" in r && !("buffer" in r)) {
57
+ return {
58
+ decryptedFile: files[i],
59
+ buffer: new BufferedWriteContext(files[i].fileName),
60
+ error: String(r.error),
61
+ agentText: "",
62
+ };
63
+ }
64
+ return r;
65
+ });
66
+ }
67
+ async function scanOneFile(db, file, events) {
68
+ const buffer = new BufferedWriteContext(file.fileName);
69
+ events?.scanStart?.({ fileName: file.fileName });
70
+ const block = buildDocumentBlock(file.decryptedBytes, file.fileName, file.mime);
62
71
  const messages = [
63
72
  {
64
73
  role: "user",
@@ -68,70 +77,214 @@ export async function scanFile(filePath, opts = {}) {
68
77
  ],
69
78
  },
70
79
  ];
71
- const spinner = statusSpinner(`Scanning ${file.fileName}...`);
72
- let summary = "";
73
80
  try {
74
81
  const text = await runScanAgent({
75
82
  db,
76
83
  initialMessages: messages,
77
84
  prompt: { fileName: file.fileName },
78
85
  agentCtx: {
79
- fileId,
80
- interactive: opts.interactive ?? true,
81
- promptUser: opts.interactive === false ? undefined : makePromptUser(spinner),
82
- onComplete: (s) => { summary = s; },
86
+ interactive: false,
87
+ buffer,
83
88
  },
84
- onProgress: makeAgentOnProgress(spinner, file.fileName),
89
+ onProgress: (event) => {
90
+ if (event.phase === "tool" && event.toolName) {
91
+ events?.scanProgress?.({ fileName: file.fileName, step: event.toolName });
92
+ }
93
+ else if (event.phase === "responding") {
94
+ events?.scanProgress?.({ fileName: file.fileName, step: "thinking" });
95
+ }
96
+ },
97
+ });
98
+ events?.scanEnd?.({
99
+ fileName: file.fileName,
100
+ status: "scanned",
101
+ entries: buffer.journalEntries.length,
102
+ concerns: buffer.concerns.length,
85
103
  });
86
- const stillPending = countPendingQuestions(db, fileId);
87
- if (stillPending > 0) {
88
- setFileStatus(db, fileId, "needs_input", { raw_text: text });
89
- spinner.info(`${file.fileName} needs input (${stillPending} pending).`);
90
- return { fileId, status: "needs_input", summary: summary || text, pendingQuestions: stillPending };
104
+ return { decryptedFile: file, buffer, agentText: text };
105
+ }
106
+ catch (err) {
107
+ const message = err?.message ?? "agent error";
108
+ events?.scanEnd?.({
109
+ fileName: file.fileName,
110
+ status: "failed",
111
+ entries: 0,
112
+ concerns: 0,
113
+ error: message,
114
+ });
115
+ return { decryptedFile: file, buffer, error: message, agentText: "" };
116
+ }
117
+ }
118
+ // ── Phase 3: cross-file correlation ─────────────────────────────────────────
119
+ /**
120
+ * For every pair of buffered entries that look like the same money movement
121
+ * across two different files, append a mirror concern to each side's buffer.
122
+ * Returns the number of pairs detected so the CLI can report it.
123
+ */
124
+ function applyCrossFileCorrelations(results) {
125
+ const all = [];
126
+ for (const res of results) {
127
+ if (res.error)
128
+ continue;
129
+ for (const be of res.buffer.journalEntries) {
130
+ all.push({
131
+ file: res,
132
+ entryId: be.entry_id,
133
+ lines: be.input.lines,
134
+ date: be.input.date,
135
+ description: be.input.description,
136
+ });
91
137
  }
92
- setFileStatus(db, fileId, "scanned", { raw_text: text });
93
- spinner.succeed(`Scanned ${file.fileName}.`);
138
+ }
139
+ const candidates = all.map(e => {
140
+ const debit = e.lines.reduce((s, l) => s + (l.debit ?? 0), 0);
141
+ const currency = e.lines.find(l => l.currency)?.currency ?? "THB";
142
+ const ids = Array.from(new Set(e.lines.map(l => l.account_id)));
94
143
  return {
95
- fileId,
96
- status: wasReplaced ? "replaced" : "scanned",
97
- summary: summary || text,
98
- pendingQuestions: 0,
144
+ id: e.entryId,
145
+ date: e.date,
146
+ description: e.description,
147
+ amount: Math.round(debit * 100) / 100,
148
+ currency,
149
+ account_ids: ids,
150
+ account_names: ids,
99
151
  };
152
+ });
153
+ const pairs = correlatePairs(candidates, { toleranceDays: 3 });
154
+ const byEntry = new Map(all.map(a => [a.entryId, a]));
155
+ for (const pair of pairs) {
156
+ const a = byEntry.get(pair.a.id);
157
+ const b = byEntry.get(pair.b.id);
158
+ if (!a || !b)
159
+ continue;
160
+ if (a.file === b.file)
161
+ continue; // same-file pairs are within-statement dupes; review's own detectors will handle.
162
+ const amountStr = `฿${pair.amount.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
163
+ a.file.buffer.appendConcern({
164
+ entry_id: a.entryId,
165
+ account_id: null,
166
+ prompt: `Looks like the matching half of this ${amountStr} movement on ${a.date} was also recorded in ${b.file.decryptedFile.fileName} on ${b.date}. Merge during review?`,
167
+ options: ["Yes — merge into one entry", "No — these are two real events", "Skip — leave as is"],
168
+ });
169
+ b.file.buffer.appendConcern({
170
+ entry_id: b.entryId,
171
+ account_id: null,
172
+ prompt: `Looks like the matching half of this ${amountStr} movement on ${b.date} was also recorded in ${a.file.decryptedFile.fileName} on ${a.date}. Merge during review?`,
173
+ options: ["Yes — merge into one entry", "No — these are two real events", "Skip — leave as is"],
174
+ });
100
175
  }
101
- catch (err) {
102
- setFileStatus(db, fileId, "failed", { error: err.message });
103
- spinner.fail(`${file.fileName} failed: ${err.message}`);
104
- return { fileId, status: "failed", error: err.message, pendingQuestions: countPendingQuestions(db, fileId) };
105
- }
176
+ return pairs.filter(p => byEntry.get(p.a.id)?.file !== byEntry.get(p.b.id)?.file).length;
106
177
  }
107
- export function compileMatcher(input) {
108
- return new RegExp(input, "i");
178
+ // ── Phase 4: commit ─────────────────────────────────────────────────────────
179
+ function commitAll(db, decryptResult, scanResults) {
180
+ const out = [];
181
+ // Skipped files: keep them in the summary with their existing concern count.
182
+ for (const skipped of decryptResult.skipped) {
183
+ out.push({
184
+ name: skipped.file.name,
185
+ relPath: skipped.file.relPath,
186
+ status: "skipped",
187
+ entries: 0,
188
+ concerns: countOpenConcerns(db, { file_id: skipped.existingScannedFileId }),
189
+ });
190
+ }
191
+ // Files that failed to decrypt never reached an agent.
192
+ for (const failed of decryptResult.failed) {
193
+ out.push({
194
+ name: failed.file.name,
195
+ relPath: failed.file.relPath,
196
+ status: "failed",
197
+ entries: 0,
198
+ concerns: 0,
199
+ error: failed.error,
200
+ });
201
+ }
202
+ // Scanned files: per-file transaction. Replaces prior records when needed.
203
+ for (const res of scanResults) {
204
+ const { decryptedFile, buffer, error, agentText } = res;
205
+ if (error) {
206
+ out.push({
207
+ name: decryptedFile.fileName,
208
+ relPath: decryptedFile.relPath,
209
+ status: "failed",
210
+ entries: 0,
211
+ concerns: buffer.concerns.length,
212
+ error,
213
+ });
214
+ continue;
215
+ }
216
+ try {
217
+ if (decryptedFile.replacesPriorScannedFileId) {
218
+ deleteScannedFile(db, decryptedFile.replacesPriorScannedFileId);
219
+ }
220
+ const scannedFileId = insertScannedFile(db, {
221
+ path: decryptedFile.path,
222
+ hash: decryptedFile.hash,
223
+ mime: decryptedFile.mime,
224
+ });
225
+ const counts = buffer.commit(db, scannedFileId);
226
+ setFileStatus(db, scannedFileId, "scanned", { raw_text: agentText });
227
+ out.push({
228
+ name: decryptedFile.fileName,
229
+ relPath: decryptedFile.relPath,
230
+ status: decryptedFile.replacesPriorScannedFileId ? "replaced" : "scanned",
231
+ entries: counts.entries,
232
+ concerns: counts.concerns,
233
+ });
234
+ }
235
+ catch (err) {
236
+ out.push({
237
+ name: decryptedFile.fileName,
238
+ relPath: decryptedFile.relPath,
239
+ status: "failed",
240
+ entries: 0,
241
+ concerns: buffer.concerns.length,
242
+ error: err?.message ?? "commit failed",
243
+ });
244
+ }
245
+ }
246
+ return out;
109
247
  }
110
- export async function runScan(opts = {}) {
111
- const matcher = opts.regex ? compileMatcher(opts.regex) : null;
112
- const files = scanDataDir().filter(f => (matcher ? matcher.test(f.relPath) : true));
248
+ // ── Summary assembly ────────────────────────────────────────────────────────
249
+ function buildSummary(total, details, _decrypt) {
113
250
  const summary = {
114
- total: files.length,
251
+ total,
115
252
  scanned: 0,
116
253
  replaced: 0,
117
254
  skipped: 0,
118
- needsInput: 0,
119
255
  failed: 0,
120
- details: [],
256
+ concerns: 0,
257
+ details,
121
258
  };
122
- for (const f of files) {
123
- const result = await scanFile(f.path, opts);
124
- summary.details.push({ name: f.name, relPath: f.relPath, result });
125
- if (result.status === "scanned")
126
- summary.scanned++;
127
- else if (result.status === "replaced")
128
- summary.replaced++;
129
- else if (result.status === "skipped")
130
- summary.skipped++;
131
- else if (result.status === "needs_input")
132
- summary.needsInput++;
133
- else if (result.status === "failed")
134
- summary.failed++;
259
+ for (const d of details) {
260
+ summary[d.status]++;
261
+ summary.concerns += d.concerns;
135
262
  }
136
263
  return summary;
137
264
  }
265
+ function buildAbortedSummary(total, decrypt) {
266
+ const details = [
267
+ ...decrypt.skipped.map(s => ({
268
+ name: s.file.name, relPath: s.file.relPath, status: "skipped", entries: 0, concerns: 0,
269
+ })),
270
+ ...decrypt.failed.map(f => ({
271
+ name: f.file.name, relPath: f.file.relPath, status: "failed", entries: 0, concerns: 0, error: f.error,
272
+ })),
273
+ ];
274
+ return buildSummary(total, details, decrypt);
275
+ }
276
+ // ── Low-level DB helpers ────────────────────────────────────────────────────
277
+ function deleteScannedFile(db, id) {
278
+ db.prepare(`DELETE FROM scanned_files WHERE id = ?`).run(id);
279
+ }
280
+ function insertScannedFile(db, args) {
281
+ const id = `sf:${randomUUID()}`;
282
+ db.prepare(`INSERT INTO scanned_files (id, path, file_hash, mime, status)
283
+ VALUES (?, ?, ?, ?, 'pending')`).run(id, args.path, args.hash, args.mime);
284
+ return id;
285
+ }
286
+ function setFileStatus(db, id, status, fields = {}) {
287
+ db.prepare(`UPDATE scanned_files
288
+ SET status = ?, scanned_at = datetime('now'), error = ?, raw_text = COALESCE(?, raw_text)
289
+ WHERE id = ?`).run(status, fields.error ?? null, fields.raw_text ?? null, id);
290
+ }
@@ -14,7 +14,7 @@ export function buildScanUserMessage(opts) {
14
14
  `3. If this document references an account that isn't yet in the chart, call create_account once. Mask the account number to the last 4 digits.`,
15
15
  `4. Persist any document-level metadata you find (statement_day, due_day, points_balance, etc.) using update_account_metadata.`,
16
16
  `5. For every transaction in the document, call record_journal_entry with balanced debit/credit lines. Use existing accounts where possible; create expense/income accounts as needed.`,
17
- `6. If a row is ambiguous, call ask_user before guessing.`,
17
+ `6. Never pause to ask the user. If a row is ambiguous, post your best-guess entry first, then call note_concern with details and the new entry_id. If a row is truly unparseable, skip it and call note_concern with the raw row text (no entry_id). A missing row is better than a wrong row.`,
18
18
  `7. When you are done, call mark_file_scanned with a short summary.`,
19
19
  ].join("\n");
20
20
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "plasalid",
3
- "version": "0.3.5",
3
+ "version": "0.4.1",
4
4
  "description": "A local-first AI that reads every line of your transactions and coaches you the best move.",
5
5
  "keywords": [
6
6
  "finance",