plasalid 0.3.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +33 -43
  2. package/dist/accounts/taxonomy.d.ts +1 -1
  3. package/dist/accounts/taxonomy.js +2 -2
  4. package/dist/ai/agent.d.ts +19 -5
  5. package/dist/ai/agent.js +26 -6
  6. package/dist/ai/memory.d.ts +14 -5
  7. package/dist/ai/memory.js +12 -0
  8. package/dist/ai/personas.d.ts +11 -0
  9. package/dist/ai/personas.js +193 -0
  10. package/dist/ai/prompt-sections.d.ts +49 -0
  11. package/dist/ai/prompt-sections.js +107 -0
  12. package/dist/ai/system-prompt.d.ts +14 -3
  13. package/dist/ai/system-prompt.js +59 -165
  14. package/dist/ai/thinking.js +1 -1
  15. package/dist/ai/tools/common.js +2 -5
  16. package/dist/ai/tools/index.js +32 -7
  17. package/dist/ai/tools/ingest.d.ts +3 -1
  18. package/dist/ai/tools/ingest.js +372 -124
  19. package/dist/ai/tools/merchants.d.ts +2 -0
  20. package/dist/ai/tools/merchants.js +117 -0
  21. package/dist/ai/tools/read.js +57 -24
  22. package/dist/ai/tools/record.d.ts +2 -0
  23. package/dist/ai/tools/record.js +188 -0
  24. package/dist/ai/tools/review.d.ts +2 -0
  25. package/dist/ai/tools/review.js +359 -0
  26. package/dist/ai/tools/scan.js +5 -3
  27. package/dist/ai/tools/types.d.ts +33 -4
  28. package/dist/cli/commands/accounts.js +33 -25
  29. package/dist/cli/commands/record.d.ts +4 -0
  30. package/dist/cli/commands/record.js +119 -0
  31. package/dist/cli/commands/revert.js +1 -1
  32. package/dist/cli/commands/review.d.ts +2 -0
  33. package/dist/cli/commands/review.js +15 -0
  34. package/dist/cli/commands/scan.d.ts +4 -2
  35. package/dist/cli/commands/scan.js +143 -19
  36. package/dist/cli/commands/status.js +6 -9
  37. package/dist/cli/commands/transactions.js +36 -41
  38. package/dist/cli/format.d.ts +2 -0
  39. package/dist/cli/format.js +7 -2
  40. package/dist/cli/index.js +28 -13
  41. package/dist/cli/ink/scan_dashboard.d.ts +38 -0
  42. package/dist/cli/ink/scan_dashboard.js +62 -0
  43. package/dist/cli/setup.d.ts +0 -1
  44. package/dist/cli/setup.js +2 -8
  45. package/dist/cli/ux.d.ts +2 -1
  46. package/dist/cli/ux.js +36 -2
  47. package/dist/currency.d.ts +3 -0
  48. package/dist/currency.js +12 -1
  49. package/dist/db/queries/account_balance.d.ts +84 -4
  50. package/dist/db/queries/account_balance.js +239 -20
  51. package/dist/db/queries/action_log.d.ts +29 -0
  52. package/dist/db/queries/action_log.js +27 -0
  53. package/dist/db/queries/concerns.d.ts +50 -0
  54. package/dist/db/queries/concerns.js +91 -0
  55. package/dist/db/queries/journal.d.ts +75 -8
  56. package/dist/db/queries/journal.js +131 -19
  57. package/dist/db/queries/merchants.d.ts +42 -0
  58. package/dist/db/queries/merchants.js +120 -0
  59. package/dist/db/queries/recurrences.d.ts +33 -0
  60. package/dist/db/queries/recurrences.js +128 -0
  61. package/dist/db/queries/search.d.ts +5 -4
  62. package/dist/db/queries/search.js +16 -12
  63. package/dist/db/queries/transactions.d.ts +167 -0
  64. package/dist/db/queries/transactions.js +320 -0
  65. package/dist/db/schema.js +74 -9
  66. package/dist/reviewer/pipeline.d.ts +18 -0
  67. package/dist/reviewer/pipeline.js +46 -0
  68. package/dist/reviewer/prompts.d.ts +12 -0
  69. package/dist/reviewer/prompts.js +22 -0
  70. package/dist/scanner/account_mutex.d.ts +1 -0
  71. package/dist/scanner/account_mutex.js +16 -0
  72. package/dist/scanner/buffer.d.ts +51 -0
  73. package/dist/scanner/buffer.js +63 -0
  74. package/dist/scanner/concurrency.d.ts +14 -0
  75. package/dist/scanner/concurrency.js +31 -0
  76. package/dist/scanner/decrypt_queue.d.ts +57 -0
  77. package/dist/scanner/decrypt_queue.js +96 -0
  78. package/dist/scanner/pipeline.d.ts +47 -18
  79. package/dist/scanner/pipeline.js +247 -97
  80. package/dist/scanner/prompts.js +3 -3
  81. package/package.json +2 -2
package/dist/db/schema.js CHANGED
@@ -4,6 +4,7 @@ export function migrate(db) {
4
4
  id TEXT PRIMARY KEY,
5
5
  name TEXT NOT NULL,
6
6
  type TEXT NOT NULL CHECK(type IN ('asset','liability','income','expense','equity')),
7
+ parent_id TEXT REFERENCES accounts(id),
7
8
  subtype TEXT,
8
9
  bank_name TEXT,
9
10
  account_number_masked TEXT,
@@ -13,33 +14,78 @@ export function migrate(db) {
13
14
  points_balance REAL,
14
15
  metadata_json TEXT,
15
16
  pii_flag INTEGER NOT NULL DEFAULT 0,
17
+ has_concern INTEGER NOT NULL DEFAULT 0,
16
18
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
17
19
  );
18
20
 
21
+ CREATE INDEX IF NOT EXISTS accounts_parent_idx ON accounts(parent_id);
22
+ CREATE INDEX IF NOT EXISTS accounts_type_idx ON accounts(type);
23
+
24
+ CREATE TABLE IF NOT EXISTS merchants (
25
+ id TEXT PRIMARY KEY,
26
+ canonical_name TEXT NOT NULL UNIQUE,
27
+ default_account_id TEXT REFERENCES accounts(id),
28
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
29
+ );
30
+
31
+ CREATE TABLE IF NOT EXISTS merchant_aliases (
32
+ id TEXT PRIMARY KEY,
33
+ merchant_id TEXT NOT NULL REFERENCES merchants(id) ON DELETE CASCADE,
34
+ normalized_pattern TEXT NOT NULL UNIQUE,
35
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
36
+ );
37
+
38
+ CREATE INDEX IF NOT EXISTS merchant_aliases_merchant_idx ON merchant_aliases(merchant_id);
39
+
19
40
  CREATE TABLE IF NOT EXISTS scanned_files (
20
41
  id TEXT PRIMARY KEY,
21
42
  path TEXT NOT NULL,
22
43
  file_hash TEXT NOT NULL UNIQUE,
23
44
  mime TEXT NOT NULL,
24
- status TEXT NOT NULL CHECK(status IN ('pending','scanned','needs_input','failed')),
45
+ status TEXT NOT NULL CHECK(status IN ('pending','scanned','failed')),
25
46
  raw_text TEXT,
26
47
  scanned_at TEXT,
27
48
  error TEXT,
28
49
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
29
50
  );
30
51
 
31
- CREATE TABLE IF NOT EXISTS journal_entries (
52
+ CREATE TABLE IF NOT EXISTS recurrences (
53
+ id TEXT PRIMARY KEY,
54
+ account_id TEXT NOT NULL REFERENCES accounts(id) ON DELETE CASCADE,
55
+ description TEXT NOT NULL,
56
+ frequency TEXT NOT NULL CHECK(frequency IN ('weekly','biweekly','monthly','annually')),
57
+ amount_typical REAL,
58
+ currency TEXT NOT NULL DEFAULT 'THB',
59
+ first_seen_date TEXT,
60
+ last_seen_date TEXT,
61
+ next_expected_date TEXT,
62
+ notes TEXT,
63
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
64
+ );
65
+
66
+ CREATE INDEX IF NOT EXISTS recurrences_account_idx ON recurrences(account_id);
67
+
68
+ CREATE TABLE IF NOT EXISTS transactions (
32
69
  id TEXT PRIMARY KEY,
33
70
  date TEXT NOT NULL,
34
71
  description TEXT NOT NULL,
72
+ merchant_id TEXT REFERENCES merchants(id),
73
+ raw_descriptor TEXT,
35
74
  source_file_id TEXT REFERENCES scanned_files(id) ON DELETE CASCADE,
36
75
  source_page INTEGER,
76
+ recurrence_id TEXT REFERENCES recurrences(id) ON DELETE SET NULL,
77
+ has_concern INTEGER NOT NULL DEFAULT 0,
37
78
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
38
79
  );
39
80
 
40
- CREATE TABLE IF NOT EXISTS journal_lines (
81
+ CREATE INDEX IF NOT EXISTS transactions_recurrence_idx ON transactions(recurrence_id);
82
+ CREATE INDEX IF NOT EXISTS transactions_source_file_idx ON transactions(source_file_id);
83
+ CREATE INDEX IF NOT EXISTS transactions_date_idx ON transactions(date);
84
+ CREATE INDEX IF NOT EXISTS transactions_merchant_idx ON transactions(merchant_id);
85
+
86
+ CREATE TABLE IF NOT EXISTS postings (
41
87
  id TEXT PRIMARY KEY,
42
- entry_id TEXT NOT NULL REFERENCES journal_entries(id) ON DELETE CASCADE,
88
+ transaction_id TEXT NOT NULL REFERENCES transactions(id) ON DELETE CASCADE,
43
89
  account_id TEXT NOT NULL REFERENCES accounts(id),
44
90
  debit REAL NOT NULL DEFAULT 0,
45
91
  credit REAL NOT NULL DEFAULT 0,
@@ -49,14 +95,15 @@ export function migrate(db) {
49
95
  CHECK (debit >= 0 AND credit >= 0 AND (debit = 0 OR credit = 0))
50
96
  );
51
97
 
52
- CREATE INDEX IF NOT EXISTS journal_lines_entry_idx ON journal_lines(entry_id);
53
- CREATE INDEX IF NOT EXISTS journal_lines_account_idx ON journal_lines(account_id);
54
- CREATE INDEX IF NOT EXISTS journal_entries_source_file_idx ON journal_entries(source_file_id);
55
- CREATE INDEX IF NOT EXISTS journal_entries_date_idx ON journal_entries(date);
98
+ CREATE INDEX IF NOT EXISTS postings_transaction_idx ON postings(transaction_id);
99
+ CREATE INDEX IF NOT EXISTS postings_account_idx ON postings(account_id);
56
100
 
57
- CREATE TABLE IF NOT EXISTS pending_questions (
101
+ CREATE TABLE IF NOT EXISTS concerns (
58
102
  id TEXT PRIMARY KEY,
59
103
  file_id TEXT REFERENCES scanned_files(id) ON DELETE CASCADE,
104
+ transaction_id TEXT REFERENCES transactions(id) ON DELETE CASCADE,
105
+ account_id TEXT REFERENCES accounts(id) ON DELETE CASCADE,
106
+ kind TEXT,
60
107
  prompt TEXT NOT NULL,
61
108
  options_json TEXT,
62
109
  answer TEXT,
@@ -91,5 +138,23 @@ export function migrate(db) {
91
138
  use_count INTEGER NOT NULL DEFAULT 0,
92
139
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
93
140
  );
141
+
142
+ CREATE TABLE IF NOT EXISTS action_log (
143
+ id TEXT PRIMARY KEY,
144
+ correlation_id TEXT NOT NULL,
145
+ command TEXT NOT NULL,
146
+ user_input TEXT,
147
+ action_type TEXT NOT NULL CHECK(action_type IN (
148
+ 'create_account','update_account_metadata','record_transaction','adjust_balance',
149
+ 'create_merchant','update_merchant_default'
150
+ )),
151
+ target_id TEXT NOT NULL,
152
+ payload_json TEXT NOT NULL,
153
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
154
+ reverted_at TEXT
155
+ );
156
+
157
+ CREATE INDEX IF NOT EXISTS action_log_correlation_idx ON action_log(correlation_id);
158
+ CREATE INDEX IF NOT EXISTS action_log_created_idx ON action_log(created_at);
94
159
  `);
95
160
  }
@@ -0,0 +1,18 @@
1
+ export interface ReviewOptions {
2
+ accountId?: string;
3
+ from?: string;
4
+ to?: string;
5
+ dryRun?: boolean;
6
+ interactive?: boolean;
7
+ }
8
+ export interface ReviewSummary {
9
+ summary: string;
10
+ dryRun: boolean;
11
+ }
12
+ /**
13
+ * Walk the existing ledger with the review-profile agent: surface open
14
+ * concerns (uncategorized cleanup first), detect correlated transactions and
15
+ * recurrences, propose fixes, apply them (or print "would do X" stubs when
16
+ * dryRun is on) after the user confirms one step at a time.
17
+ */
18
+ export declare function runReview(opts?: ReviewOptions): Promise<ReviewSummary>;
@@ -0,0 +1,46 @@
1
+ import { getDb } from "../db/connection.js";
2
+ import { runReviewAgent } from "../ai/agent.js";
3
+ import { statusSpinner, makePromptUser, makeAgentOnProgress, } from "../cli/ux.js";
4
+ import { buildReviewUserMessage } from "./prompts.js";
5
+ /**
6
+ * Walk the existing ledger with the review-profile agent: surface open
7
+ * concerns (uncategorized cleanup first), detect correlated transactions and
8
+ * recurrences, propose fixes, apply them (or print "would do X" stubs when
9
+ * dryRun is on) after the user confirms one step at a time.
10
+ */
11
+ export async function runReview(opts = {}) {
12
+ const db = getDb();
13
+ const interactive = opts.interactive ?? true;
14
+ const dryRun = !!opts.dryRun;
15
+ const scope = {
16
+ accountId: opts.accountId,
17
+ from: opts.from,
18
+ to: opts.to,
19
+ dryRun,
20
+ };
21
+ const spinner = statusSpinner(`Reviewing${dryRun ? " (dry-run)" : ""}...`);
22
+ const promptUser = interactive ? makePromptUser(spinner) : undefined;
23
+ let summary = "";
24
+ try {
25
+ await runReviewAgent({
26
+ db,
27
+ prompt: scope,
28
+ initialMessages: [
29
+ { role: "user", content: buildReviewUserMessage(scope) },
30
+ ],
31
+ agentCtx: {
32
+ interactive,
33
+ dryRun,
34
+ promptUser,
35
+ onComplete: (s) => { summary = s; },
36
+ },
37
+ onProgress: makeAgentOnProgress(spinner),
38
+ });
39
+ spinner.succeed(dryRun ? "Review complete (dry-run — no writes)." : "Review complete.");
40
+ }
41
+ catch (err) {
42
+ spinner.fail(`Review failed: ${err.message}`);
43
+ throw err;
44
+ }
45
+ return { summary, dryRun };
46
+ }
@@ -0,0 +1,12 @@
1
+ export interface ReviewScope {
2
+ accountId?: string;
3
+ from?: string;
4
+ to?: string;
5
+ dryRun: boolean;
6
+ }
7
+ /**
8
+ * Kickoff message the review agent receives. The persona + chart-of-accounts
9
+ * snapshot live in the system prompt (`buildReviewSystemPrompt`); this is
10
+ * the per-session instruction.
11
+ */
12
+ export declare function buildReviewUserMessage(scope: ReviewScope): string;
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Kickoff message the review agent receives. The persona + chart-of-accounts
3
+ * snapshot live in the system prompt (`buildReviewSystemPrompt`); this is
4
+ * the per-session instruction.
5
+ */
6
+ export function buildReviewUserMessage(scope) {
7
+ return [
8
+ `Review the local Plasalid ledger.`,
9
+ ``,
10
+ `Scope:`,
11
+ `- account: ${scope.accountId ?? "all"}`,
12
+ `- from: ${scope.from ?? "all time"}`,
13
+ `- to: ${scope.to ?? "now"}`,
14
+ `- dry run: ${scope.dryRun ? "yes — write tools are no-ops" : "no — writes commit after confirmation"}`,
15
+ ``,
16
+ `Steps:`,
17
+ `1. Survey first: list_accounts, get_net_worth, count open concerns (especially kind='uncategorized_expense'), then find_duplicate_transactions, find_similar_accounts, find_unused_accounts, find_correlated_transactions, find_recurrences. Hold the candidate list internally.`,
18
+ `2. Prioritize: (a) uncategorized expense cleanup — these are postings parked in expense:uncategorized awaiting a real category; resolving one should also call set_merchant_default_account when the transaction has a merchant, so future statements skip the categorizer. (b) other open concerns. (c) correlated transactions. (d) recurrences. (e) chart-of-accounts hygiene.`,
19
+ `3. Ask one focused question at a time via ask_user. Group sibling concerns (same merchant, same answer) via related_concern_ids so the user answers once. After each answer, apply the change and re-survey only if the change invalidated other candidates.`,
20
+ `4. Loop until no open concerns remain (or the user keeps choosing "Skip — leave as is"). Then call mark_review_done with a short summary of what was applied, recorded, and skipped.`,
21
+ ].join("\n");
22
+ }
@@ -0,0 +1 @@
1
+ export declare function runExclusive<T>(fn: () => Promise<T> | T): Promise<T>;
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Process-wide serialization for write operations that race when multiple scan
3
+ * agents run in parallel. Each in-flight `create_account` / `update_account_metadata`
4
+ * is held inside `runExclusive` so the SQLite write + the subsequent read-back
5
+ * by another agent's `list_accounts` are consistent.
6
+ *
7
+ * Single tail-promise queue: cheap, deterministic, no extra deps.
8
+ */
9
+ let tail = Promise.resolve();
10
+ export function runExclusive(fn) {
11
+ const next = tail.then(() => fn());
12
+ // Swallow rejection so a thrown callback doesn't poison the queue for the
13
+ // next caller. The caller still sees the rejection through `next`.
14
+ tail = next.catch(() => undefined);
15
+ return next;
16
+ }
@@ -0,0 +1,51 @@
1
+ import type Database from "libsql";
2
+ import { type TransactionInput } from "../db/queries/transactions.js";
3
+ /**
4
+ * One scan agent's pending writes. Transactions and concerns accumulate here
5
+ * while the LLM works; nothing hits the DB until `commit()` runs inside a
6
+ * single SQLite transaction. If `commit()` throws, the transaction rolls back
7
+ * and the DB stays exactly as it was before this file's scan began.
8
+ *
9
+ * Account writes (`create_account`, `update_account_metadata`) and merchant
10
+ * writes deliberately bypass the buffer — they go directly to the DB through
11
+ * their own mutexes so concurrent agents see each other's creates and don't
12
+ * duplicate.
13
+ */
14
+ export interface BufferedConcern {
15
+ /** Synthesized when the LLM called note_concern with a buffered transaction_id. */
16
+ transaction_id: string | null;
17
+ account_id: string | null;
18
+ kind?: string | null;
19
+ prompt: string;
20
+ options?: string[];
21
+ }
22
+ export interface BufferedTransaction {
23
+ /** Synthesized at queue-time so concerns can reference this transaction. */
24
+ transaction_id: string;
25
+ input: TransactionInput;
26
+ }
27
+ export declare class BufferedWriteContext {
28
+ readonly fileName: string;
29
+ readonly transactions: BufferedTransaction[];
30
+ readonly concerns: BufferedConcern[];
31
+ doneSummary: string | null;
32
+ constructor(fileName: string);
33
+ /**
34
+ * Queue a transaction. Returns the synthesized transaction id so the agent
35
+ * can use it in subsequent note_concern calls inside the same file.
36
+ */
37
+ appendTransaction(input: TransactionInput): string;
38
+ appendConcern(concern: BufferedConcern): void;
39
+ markDone(summary: string): void;
40
+ get isDone(): boolean;
41
+ /**
42
+ * Replay all buffered writes inside one DB transaction. `scannedFileId` is
43
+ * stamped onto every transaction and concern so they're attributable to this
44
+ * file. Returns `{ transactions, concerns }` counts so the caller can report
45
+ * them.
46
+ */
47
+ commit(db: Database.Database, scannedFileId: string): {
48
+ transactions: number;
49
+ concerns: number;
50
+ };
51
+ }
@@ -0,0 +1,63 @@
1
+ import { randomUUID } from "crypto";
2
+ import { insertTransactionRows, validateTransaction, } from "../db/queries/transactions.js";
3
+ import { recordConcern } from "../db/queries/concerns.js";
4
+ export class BufferedWriteContext {
5
+ fileName;
6
+ transactions = [];
7
+ concerns = [];
8
+ doneSummary = null;
9
+ constructor(fileName) {
10
+ this.fileName = fileName;
11
+ }
12
+ /**
13
+ * Queue a transaction. Returns the synthesized transaction id so the agent
14
+ * can use it in subsequent note_concern calls inside the same file.
15
+ */
16
+ appendTransaction(input) {
17
+ const transactionId = `tx:${randomUUID()}`;
18
+ this.transactions.push({ transaction_id: transactionId, input });
19
+ return transactionId;
20
+ }
21
+ appendConcern(concern) {
22
+ this.concerns.push(concern);
23
+ }
24
+ markDone(summary) {
25
+ this.doneSummary = summary;
26
+ }
27
+ get isDone() {
28
+ return this.doneSummary !== null;
29
+ }
30
+ /**
31
+ * Replay all buffered writes inside one DB transaction. `scannedFileId` is
32
+ * stamped onto every transaction and concern so they're attributable to this
33
+ * file. Returns `{ transactions, concerns }` counts so the caller can report
34
+ * them.
35
+ */
36
+ commit(db, scannedFileId) {
37
+ const validated = this.transactions.map(b => ({
38
+ buffered: b,
39
+ validated: validateTransaction({
40
+ ...b.input,
41
+ id: b.transaction_id,
42
+ source_file_id: scannedFileId,
43
+ }),
44
+ }));
45
+ const tx = db.transaction(() => {
46
+ for (const { validated: v } of validated) {
47
+ insertTransactionRows(db, v);
48
+ }
49
+ for (const c of this.concerns) {
50
+ recordConcern(db, {
51
+ file_id: scannedFileId,
52
+ transaction_id: c.transaction_id,
53
+ account_id: c.account_id,
54
+ kind: c.kind ?? null,
55
+ prompt: c.prompt,
56
+ options: c.options,
57
+ });
58
+ }
59
+ });
60
+ tx();
61
+ return { transactions: this.transactions.length, concerns: this.concerns.length };
62
+ }
63
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Run an array of async task factories with a fixed concurrency bound. Resolves
3
+ * to an array of results in the same order as the input tasks (regardless of
4
+ * completion order). Any rejection settles that slot with `undefined` and the
5
+ * caller is responsible for tracking failures — but since each task is wrapped
6
+ * in `Promise.resolve()` and pushed through `try/catch`, one task throwing
7
+ * never aborts the rest of the run.
8
+ *
9
+ * No new dependency. Simple worker-pool: kicks off up to `n` tasks, then each
10
+ * worker pulls the next index from a shared cursor until the queue is drained.
11
+ */
12
+ export declare function runWithConcurrency<T>(tasks: Array<() => Promise<T>>, n: number): Promise<Array<T | {
13
+ error: unknown;
14
+ }>>;
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Run an array of async task factories with a fixed concurrency bound. Resolves
3
+ * to an array of results in the same order as the input tasks (regardless of
4
+ * completion order). Any rejection settles that slot with `undefined` and the
5
+ * caller is responsible for tracking failures — but since each task is wrapped
6
+ * in `Promise.resolve()` and pushed through `try/catch`, one task throwing
7
+ * never aborts the rest of the run.
8
+ *
9
+ * No new dependency. Simple worker-pool: kicks off up to `n` tasks, then each
10
+ * worker pulls the next index from a shared cursor until the queue is drained.
11
+ */
12
+ export async function runWithConcurrency(tasks, n) {
13
+ const results = new Array(tasks.length);
14
+ const workerCount = Math.max(1, Math.min(n, tasks.length));
15
+ let cursor = 0;
16
+ async function worker() {
17
+ while (true) {
18
+ const index = cursor++;
19
+ if (index >= tasks.length)
20
+ return;
21
+ try {
22
+ results[index] = await tasks[index]();
23
+ }
24
+ catch (err) {
25
+ results[index] = { error: err };
26
+ }
27
+ }
28
+ }
29
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
30
+ return results;
31
+ }
@@ -0,0 +1,57 @@
1
+ import type Database from "libsql";
2
+ import type { ScannedFile } from "./walker.js";
3
+ export interface DecryptedFile {
4
+ path: string;
5
+ fileName: string;
6
+ relPath: string;
7
+ hash: string;
8
+ mime: string;
9
+ decryptedBytes: Buffer;
10
+ /** True if a prior scan covered this hash; only present when --force is set. */
11
+ replacesPriorScannedFileId?: string;
12
+ }
13
+ export interface SkippedFile {
14
+ file: ScannedFile;
15
+ /** id of the scanned_files row that already has this hash. */
16
+ existingScannedFileId: string;
17
+ }
18
+ export interface FailedFile {
19
+ file: ScannedFile;
20
+ error: string;
21
+ }
22
+ export interface DecryptQueueResult {
23
+ decrypted: DecryptedFile[];
24
+ skipped: SkippedFile[];
25
+ failed: FailedFile[];
26
+ }
27
+ export interface DecryptQueueOptions {
28
+ /** Re-decrypt and queue files that match a prior hash. */
29
+ force: boolean;
30
+ /** If false, never prompt for a password; treat unlock failure as failed. */
31
+ interactive: boolean;
32
+ /** Called as each file finishes (any outcome) so a spinner can update its label. */
33
+ onProgress?: (event: {
34
+ index: number;
35
+ total: number;
36
+ fileName: string;
37
+ outcome: "decrypted" | "skipped" | "failed";
38
+ }) => void;
39
+ }
40
+ /**
41
+ * Phase 1 of scan: walk every file in the queue, decrypt any that need it,
42
+ * and return a partition (decrypted / skipped / failed). The actual agent
43
+ * work in Phase 2 only sees `decrypted` — no password prompts during the
44
+ * parallel scan loop.
45
+ *
46
+ * Failures don't abort; the caller (CLI) confirms whether to proceed.
47
+ */
48
+ export declare function decryptQueue(db: Database.Database, files: ScannedFile[], opts: DecryptQueueOptions): Promise<DecryptQueueResult>;
49
+ /**
50
+ * Interactive go/no-go gate when some files failed to decrypt. Returns true
51
+ * if the caller should proceed with the decrypted set, false to abort the
52
+ * whole scan run.
53
+ *
54
+ * Returns true automatically when interactive is false (CI / non-TTY runs);
55
+ * the caller is expected to inspect `result.failed` and report.
56
+ */
57
+ export declare function confirmProceedAfterFailures(result: DecryptQueueResult, interactive: boolean): Promise<boolean>;
@@ -0,0 +1,96 @@
1
+ import chalk from "chalk";
2
+ import inquirer from "inquirer";
3
+ import { readPdf } from "./pdf.js";
4
+ import { unlockIfNeeded, persistUnlockOutcome } from "./unlock.js";
5
+ /**
6
+ * Phase 1 of scan: walk every file in the queue, decrypt any that need it,
7
+ * and return a partition (decrypted / skipped / failed). The actual agent
8
+ * work in Phase 2 only sees `decrypted` — no password prompts during the
9
+ * parallel scan loop.
10
+ *
11
+ * Failures don't abort; the caller (CLI) confirms whether to proceed.
12
+ */
13
+ export async function decryptQueue(db, files, opts) {
14
+ const decrypted = [];
15
+ const skipped = [];
16
+ const failed = [];
17
+ for (let i = 0; i < files.length; i++) {
18
+ const f = files[i];
19
+ let pdf;
20
+ try {
21
+ pdf = readPdf(f.path);
22
+ }
23
+ catch (err) {
24
+ failed.push({ file: f, error: `read failed: ${err.message}` });
25
+ opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "failed" });
26
+ continue;
27
+ }
28
+ const existing = findScannedByHash(db, pdf.hash);
29
+ if (existing && !opts.force) {
30
+ skipped.push({ file: f, existingScannedFileId: existing.id });
31
+ opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "skipped" });
32
+ continue;
33
+ }
34
+ try {
35
+ const unlocked = await unlockIfNeeded({
36
+ db,
37
+ filePath: f.path,
38
+ bytes: pdf.bytes,
39
+ interactive: opts.interactive,
40
+ });
41
+ persistUnlockOutcome(db, f.path, unlocked.outcome);
42
+ decrypted.push({
43
+ path: f.path,
44
+ fileName: f.name,
45
+ relPath: f.relPath,
46
+ hash: pdf.hash,
47
+ mime: pdf.mime,
48
+ decryptedBytes: unlocked.decrypted,
49
+ replacesPriorScannedFileId: existing?.id,
50
+ });
51
+ opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "decrypted" });
52
+ }
53
+ catch (err) {
54
+ failed.push({ file: f, error: err.message ?? "unlock failed" });
55
+ opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "failed" });
56
+ }
57
+ }
58
+ return { decrypted, skipped, failed };
59
+ }
60
+ /**
61
+ * Interactive go/no-go gate when some files failed to decrypt. Returns true
62
+ * if the caller should proceed with the decrypted set, false to abort the
63
+ * whole scan run.
64
+ *
65
+ * Returns true automatically when interactive is false (CI / non-TTY runs);
66
+ * the caller is expected to inspect `result.failed` and report.
67
+ */
68
+ export async function confirmProceedAfterFailures(result, interactive) {
69
+ if (result.failed.length === 0)
70
+ return true;
71
+ console.log("");
72
+ console.log(chalk.yellow(`${result.failed.length} file(s) could not be decrypted:`));
73
+ for (const f of result.failed) {
74
+ console.log(` ${chalk.red("✗")} ${f.file.relPath} — ${chalk.dim(f.error)}`);
75
+ }
76
+ if (result.decrypted.length === 0) {
77
+ console.log(chalk.red("Nothing to scan."));
78
+ return false;
79
+ }
80
+ if (!interactive)
81
+ return true;
82
+ const { proceed } = (await inquirer.prompt([
83
+ {
84
+ type: "confirm",
85
+ name: "proceed",
86
+ message: `Proceed scanning the ${result.decrypted.length} file(s) that decrypted successfully?`,
87
+ default: true,
88
+ },
89
+ ]));
90
+ return proceed;
91
+ }
92
+ function findScannedByHash(db, hash) {
93
+ return db
94
+ .prepare(`SELECT id FROM scanned_files WHERE file_hash = ?`)
95
+ .get(hash) ?? null;
96
+ }
@@ -1,32 +1,61 @@
1
+ export type ScanFileStatus = "scanned" | "replaced" | "failed" | "skipped";
1
2
  export interface ScanFileResult {
2
- fileId: string | null;
3
- status: "scanned" | "needs_input" | "failed" | "skipped" | "replaced";
4
- summary?: string;
3
+ name: string;
4
+ relPath: string;
5
+ status: ScanFileStatus;
6
+ transactions: number;
7
+ concerns: number;
5
8
  error?: string;
6
- pendingQuestions: number;
7
9
  }
8
- export interface ScanOptions {
9
- interactive?: boolean;
10
- force?: boolean;
11
- onProgress?: (msg: string) => void;
12
- }
13
- export declare function scanFile(filePath: string, opts?: ScanOptions): Promise<ScanFileResult>;
14
10
  export interface ScanSummary {
15
11
  total: number;
16
12
  scanned: number;
17
13
  replaced: number;
18
14
  skipped: number;
19
- needsInput: number;
20
15
  failed: number;
21
- details: {
22
- name: string;
23
- relPath: string;
24
- result: ScanFileResult;
25
- }[];
16
+ concerns: number;
17
+ details: ScanFileResult[];
26
18
  }
27
- export interface RunScanOptions extends ScanOptions {
28
- /** Optional regex (string). Partial, case-insensitive, against the relative path. */
19
+ /** Event hooks the CLI subscribes to. All callbacks are best-effort and ignored if absent. */
20
+ export interface ScanRunEvents {
21
+ decryptStart?: (count: number) => void;
22
+ decryptProgress?: (e: {
23
+ index: number;
24
+ total: number;
25
+ fileName: string;
26
+ outcome: "decrypted" | "skipped" | "failed";
27
+ }) => void;
28
+ decryptDone?: (e: {
29
+ decrypted: number;
30
+ skipped: number;
31
+ failed: number;
32
+ }) => void;
33
+ scanStart?: (e: {
34
+ fileName: string;
35
+ }) => void;
36
+ scanProgress?: (e: {
37
+ fileName: string;
38
+ step: string;
39
+ }) => void;
40
+ scanEnd?: (e: {
41
+ fileName: string;
42
+ status: "scanned" | "failed";
43
+ transactions: number;
44
+ concerns: number;
45
+ error?: string;
46
+ }) => void;
47
+ correlating?: (pairs: number) => void;
48
+ committing?: () => void;
49
+ }
50
+ export interface RunScanOptions {
29
51
  regex?: string;
52
+ force?: boolean;
53
+ /** Allow interactive password prompts when a PDF is encrypted. */
54
+ interactive?: boolean;
55
+ /** Max concurrent scan agents. Default 3, hard cap 8. */
56
+ concurrency?: number;
57
+ events?: ScanRunEvents;
30
58
  }
31
59
  export declare function compileMatcher(input: string): RegExp;
60
+ /** Orchestration */
32
61
  export declare function runScan(opts?: RunScanOptions): Promise<ScanSummary>;