plasalid 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/README.md +15 -15
  2. package/dist/accounts/taxonomy.d.ts +1 -1
  3. package/dist/accounts/taxonomy.js +1 -1
  4. package/dist/ai/agent.d.ts +9 -10
  5. package/dist/ai/agent.js +31 -15
  6. package/dist/ai/personas.d.ts +1 -1
  7. package/dist/ai/personas.js +57 -55
  8. package/dist/ai/prompt-sections.d.ts +4 -4
  9. package/dist/ai/prompt-sections.js +1 -1
  10. package/dist/ai/system-prompt.d.ts +2 -2
  11. package/dist/ai/system-prompt.js +5 -5
  12. package/dist/ai/tools/account-mutex.d.ts +1 -0
  13. package/dist/ai/tools/account-mutex.js +16 -0
  14. package/dist/ai/tools/clarify.d.ts +2 -0
  15. package/dist/ai/tools/clarify.js +169 -0
  16. package/dist/ai/tools/index.js +10 -18
  17. package/dist/ai/tools/ingest.d.ts +2 -2
  18. package/dist/ai/tools/ingest.js +284 -244
  19. package/dist/ai/tools/merchants.js +1 -28
  20. package/dist/ai/tools/read.js +8 -8
  21. package/dist/ai/tools/record.js +7 -40
  22. package/dist/ai/tools/resolve.js +25 -22
  23. package/dist/ai/tools/scan.js +0 -1
  24. package/dist/ai/tools/types.d.ts +14 -21
  25. package/dist/cli/commands/clarify.d.ts +5 -0
  26. package/dist/cli/commands/clarify.js +44 -0
  27. package/dist/cli/commands/record.js +1 -82
  28. package/dist/cli/commands/resolve.d.ts +5 -2
  29. package/dist/cli/commands/resolve.js +36 -5
  30. package/dist/cli/commands/revert.js +4 -2
  31. package/dist/cli/commands/rules.js +2 -2
  32. package/dist/cli/commands/scan.js +199 -128
  33. package/dist/cli/commands/status.js +6 -6
  34. package/dist/cli/index.js +8 -29
  35. package/dist/cli/ink/ScanDashboard.d.ts +49 -0
  36. package/dist/cli/ink/ScanDashboard.js +214 -0
  37. package/dist/cli/ink/scan_dashboard.d.ts +40 -25
  38. package/dist/cli/ink/scan_dashboard.js +139 -44
  39. package/dist/cli/setup.js +1 -1
  40. package/dist/cli/ux.js +1 -1
  41. package/dist/db/queries/account-balance.d.ts +1 -1
  42. package/dist/db/queries/questions.d.ts +62 -0
  43. package/dist/db/queries/questions.js +110 -0
  44. package/dist/db/queries/transactions.d.ts +1 -1
  45. package/dist/db/queries/unknowns.d.ts +17 -15
  46. package/dist/db/queries/unknowns.js +35 -39
  47. package/dist/db/schema.js +6 -28
  48. package/dist/scanner/audit/auditor.d.ts +31 -0
  49. package/dist/scanner/audit/auditor.js +72 -0
  50. package/dist/scanner/audit/engine.d.ts +10 -0
  51. package/dist/scanner/audit/engine.js +98 -0
  52. package/dist/scanner/audit/eventBus.d.ts +60 -0
  53. package/dist/scanner/audit/eventBus.js +35 -0
  54. package/dist/scanner/audit/passes/index.d.ts +11 -0
  55. package/dist/scanner/audit/passes/index.js +9 -0
  56. package/dist/scanner/audit/passes/types.d.ts +23 -0
  57. package/dist/scanner/audit/passes/types.js +1 -0
  58. package/dist/scanner/audit/types.d.ts +27 -0
  59. package/dist/scanner/audit/types.js +1 -0
  60. package/dist/scanner/auditor.d.ts +51 -0
  61. package/dist/scanner/auditor.js +80 -0
  62. package/dist/scanner/buffer/engine.d.ts +9 -0
  63. package/dist/scanner/buffer/engine.js +110 -0
  64. package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
  65. package/dist/scanner/buffer/sharedBuffer.js +130 -0
  66. package/dist/scanner/buffer/types.d.ts +67 -0
  67. package/dist/scanner/buffer/types.js +1 -0
  68. package/dist/scanner/buffer.d.ts +45 -38
  69. package/dist/scanner/buffer.js +93 -61
  70. package/dist/scanner/bus/engine.d.ts +11 -0
  71. package/dist/scanner/bus/engine.js +42 -0
  72. package/dist/scanner/bus/types.d.ts +53 -0
  73. package/dist/scanner/bus/types.js +1 -0
  74. package/dist/scanner/bus.d.ts +38 -0
  75. package/dist/scanner/bus.js +37 -0
  76. package/dist/scanner/chunk-worker.d.ts +19 -0
  77. package/dist/scanner/chunk-worker.js +67 -0
  78. package/dist/scanner/chunkWorker.d.ts +20 -0
  79. package/dist/scanner/chunkWorker.js +59 -0
  80. package/dist/scanner/chunker/chunker.d.ts +7 -0
  81. package/dist/scanner/chunker/chunker.js +60 -0
  82. package/dist/scanner/chunker.d.ts +7 -0
  83. package/dist/scanner/chunker.js +60 -0
  84. package/dist/scanner/clarifier-memory.d.ts +8 -0
  85. package/dist/scanner/clarifier-memory.js +24 -0
  86. package/dist/scanner/clarifier.d.ts +39 -0
  87. package/dist/scanner/clarifier.js +196 -0
  88. package/dist/scanner/converge.d.ts +29 -0
  89. package/dist/scanner/converge.js +15 -0
  90. package/dist/scanner/decrypt.d.ts +10 -0
  91. package/dist/scanner/decrypt.js +80 -0
  92. package/dist/scanner/engine/scanEngine.d.ts +24 -0
  93. package/dist/scanner/engine/scanEngine.js +87 -0
  94. package/dist/scanner/engine/types.d.ts +90 -0
  95. package/dist/scanner/engine/types.js +1 -0
  96. package/dist/scanner/engine.d.ts +90 -0
  97. package/dist/scanner/engine.js +84 -0
  98. package/dist/scanner/file-worker.d.ts +33 -0
  99. package/dist/scanner/file-worker.js +28 -0
  100. package/dist/scanner/fileWorker.d.ts +33 -0
  101. package/dist/scanner/fileWorker.js +22 -0
  102. package/dist/scanner/hooks/types.d.ts +25 -0
  103. package/dist/scanner/hooks/types.js +1 -0
  104. package/dist/scanner/hooks.d.ts +23 -0
  105. package/dist/scanner/hooks.js +1 -0
  106. package/dist/scanner/parse.d.ts +10 -0
  107. package/dist/scanner/parse.js +47 -0
  108. package/dist/scanner/passes/index.d.ts +8 -0
  109. package/dist/scanner/passes/index.js +6 -0
  110. package/dist/scanner/passes/types.d.ts +22 -0
  111. package/dist/scanner/passes/types.js +1 -0
  112. package/dist/scanner/pdf/chunker.d.ts +7 -0
  113. package/dist/scanner/pdf/chunker.js +60 -0
  114. package/dist/scanner/pdf/password-store.d.ts +34 -0
  115. package/dist/scanner/pdf/password-store.js +83 -0
  116. package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
  117. package/dist/scanner/pdf/pdf-unlock.js +50 -0
  118. package/dist/scanner/pdf/pdf.d.ts +17 -0
  119. package/dist/scanner/pdf/pdf.js +36 -0
  120. package/dist/scanner/pdf/state-machine.d.ts +60 -0
  121. package/dist/scanner/pdf/state-machine.js +64 -0
  122. package/dist/scanner/pdf/unlock.d.ts +22 -0
  123. package/dist/scanner/pdf/unlock.js +121 -0
  124. package/dist/scanner/phase-decrypt.d.ts +10 -0
  125. package/dist/scanner/phase-decrypt.js +80 -0
  126. package/dist/scanner/phase-parse.d.ts +10 -0
  127. package/dist/scanner/phase-parse.js +46 -0
  128. package/dist/scanner/phases/chunk.d.ts +8 -0
  129. package/dist/scanner/phases/chunk.js +13 -0
  130. package/dist/scanner/phases/commit.d.ts +12 -0
  131. package/dist/scanner/phases/commit.js +140 -0
  132. package/dist/scanner/phases/decrypt.d.ts +10 -0
  133. package/dist/scanner/phases/decrypt.js +80 -0
  134. package/dist/scanner/phases/parse.d.ts +10 -0
  135. package/dist/scanner/phases/parse.js +46 -0
  136. package/dist/scanner/phases/resolve.d.ts +10 -0
  137. package/dist/scanner/phases/resolve.js +17 -0
  138. package/dist/scanner/phases/review.d.ts +10 -0
  139. package/dist/scanner/phases/review.js +12 -0
  140. package/dist/scanner/progress.d.ts +14 -0
  141. package/dist/scanner/progress.js +21 -0
  142. package/dist/scanner/resolver-memory.d.ts +8 -0
  143. package/dist/scanner/resolver-memory.js +24 -0
  144. package/dist/scanner/resolver.d.ts +39 -0
  145. package/dist/scanner/resolver.js +196 -0
  146. package/dist/scanner/result.d.ts +17 -0
  147. package/dist/scanner/result.js +19 -0
  148. package/dist/scanner/run-passes.d.ts +30 -0
  149. package/dist/scanner/run-passes.js +15 -0
  150. package/dist/scanner/unlock.js +1 -1
  151. package/dist/scanner/worker.d.ts +19 -0
  152. package/dist/scanner/worker.js +67 -0
  153. package/dist/scanner/workers/chunkWorker.d.ts +20 -0
  154. package/dist/scanner/workers/chunkWorker.js +65 -0
  155. package/dist/scanner/workers/fileWorker.d.ts +32 -0
  156. package/dist/scanner/workers/fileWorker.js +22 -0
  157. package/package.json +1 -1
@@ -0,0 +1,83 @@
1
+ import { randomUUID } from "crypto";
2
+ import { basename } from "path";
3
+ import { encryptSecret, decryptSecret } from "../../db/encryption.js";
4
+ const REGEX_META = /[.*+?^${}()|[\]\\]/g;
5
+ const SEPARATORS = /[_\-\s.]/;
6
+ const MIN_PREFIX_LEN = 3;
7
+ /**
8
+ * Derive a regex from a filename. Strategy: take the leading alphabetic-ish
9
+ * prefix (up to the first separator: underscore, hyphen, space, or dot) and
10
+ * wildcard everything after it. Looser than a literal match — `AcctSt_May26.pdf`
11
+ * and `AcctSt_Jun26.pdf` share the same pattern.
12
+ *
13
+ * Falls back to the older digit-collapse strategy when the prefix is too short
14
+ * (<3 chars) or doesn't start with a letter, so we don't end up with overly
15
+ * generic patterns like `^a.*` or `^\d+.*`.
16
+ *
17
+ * Examples:
18
+ * `AcctSt_May26.pdf` → `^acctst.*`
19
+ * `KBank-Savings-2026-01.pdf` → `^kbank.*`
20
+ * `statement.pdf` → `^statement.*`
21
+ * `1234567890.pdf` → `^\d+\.pdf$` (fallback)
22
+ * `e-statement.pdf` → `^e\-statement\.pdf$` (fallback — prefix too short)
23
+ */
24
+ export function suggestPattern(filename) {
25
+ const name = basename(filename).toLowerCase();
26
+ const prefix = name.split(SEPARATORS)[0];
27
+ if (prefix.length >= MIN_PREFIX_LEN && /^[a-z]/.test(prefix)) {
28
+ return `^${prefix.replace(REGEX_META, "\\$&")}.*`;
29
+ }
30
+ const escaped = name.replace(REGEX_META, "\\$&");
31
+ const collapsed = escaped.replace(/\d+/g, "\\d+");
32
+ return `^${collapsed}$`;
33
+ }
34
+ /** Stored passwords whose pattern matches the basename of `filePath`. */
35
+ export function findCandidates(db, filePath, dbKey) {
36
+ const target = basename(filePath);
37
+ const rows = db
38
+ .prepare(`SELECT id, pattern, password_encrypted, use_count, last_used_at
39
+ FROM file_passwords
40
+ ORDER BY use_count DESC, last_used_at DESC NULLS LAST, created_at ASC`)
41
+ .all();
42
+ return rows
43
+ .filter(r => safeTest(r.pattern, target))
44
+ .map(r => ({
45
+ id: r.id,
46
+ pattern: r.pattern,
47
+ password: decryptSecret(r.password_encrypted, dbKey),
48
+ useCount: r.use_count,
49
+ lastUsedAt: r.last_used_at,
50
+ }));
51
+ }
52
+ function safeTest(pattern, target) {
53
+ try {
54
+ return new RegExp(pattern, "i").test(target);
55
+ }
56
+ catch {
57
+ return false;
58
+ }
59
+ }
60
+ /**
61
+ * Upsert by pattern. If the pattern already exists the row is replaced — useful
62
+ * when the bank rotates the password for a recurring statement series.
63
+ */
64
+ export function savePassword(db, pattern, password, dbKey) {
65
+ const encrypted = encryptSecret(password, dbKey);
66
+ const existing = db
67
+ .prepare(`SELECT id FROM file_passwords WHERE pattern = ?`)
68
+ .get(pattern);
69
+ if (existing) {
70
+ db.prepare(`UPDATE file_passwords
71
+ SET password_encrypted = ?, use_count = 0, last_used_at = NULL
72
+ WHERE id = ?`).run(encrypted, existing.id);
73
+ return existing.id;
74
+ }
75
+ const id = `fp:${randomUUID()}`;
76
+ db.prepare(`INSERT INTO file_passwords (id, pattern, password_encrypted) VALUES (?, ?, ?)`).run(id, pattern, encrypted);
77
+ return id;
78
+ }
79
+ export function recordUse(db, id) {
80
+ db.prepare(`UPDATE file_passwords
81
+ SET use_count = use_count + 1, last_used_at = datetime('now')
82
+ WHERE id = ?`).run(id);
83
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Thin wrapper around the mupdf WASM library. Lazy-imported on first call so
3
+ * the WASM module isn't loaded for data dirs that contain only plaintext PDFs.
4
+ */
5
+ export declare function isEncrypted(bytes: Buffer): Promise<boolean>;
6
+ export interface UnlockResult {
7
+ ok: boolean;
8
+ /** Set when `ok === true`. Plaintext (decrypted) PDF bytes ready to forward. */
9
+ decrypted?: Buffer;
10
+ }
11
+ /**
12
+ * Attempt to unlock and re-save `bytes` as an unencrypted PDF using `password`.
13
+ * Returns `{ ok: false }` on wrong password or non-PDF input. Returns
14
+ * `{ ok: true, decrypted }` on success. If the input wasn't encrypted to begin
15
+ * with, returns `{ ok: true, decrypted: bytes }` unchanged.
16
+ */
17
+ export declare function unlock(bytes: Buffer, password: string): Promise<UnlockResult>;
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Thin wrapper around the mupdf WASM library. Lazy-imported on first call so
3
+ * the WASM module isn't loaded for data dirs that contain only plaintext PDFs.
4
+ */
5
+ let mupdfPromise = null;
6
+ /** mupdf's authenticatePassword returns 0 on a wrong password, non-zero on success. */
7
+ const MUPDF_AUTH_FAILED = 0;
8
+ function getMupdf() {
9
+ if (!mupdfPromise) {
10
+ mupdfPromise = import("mupdf");
11
+ }
12
+ return mupdfPromise;
13
+ }
14
+ export async function isEncrypted(bytes) {
15
+ const mupdf = await getMupdf();
16
+ const doc = mupdf.Document.openDocument(bytes, "application/pdf");
17
+ try {
18
+ return doc.needsPassword();
19
+ }
20
+ finally {
21
+ doc.destroy();
22
+ }
23
+ }
24
+ /**
25
+ * Attempt to unlock and re-save `bytes` as an unencrypted PDF using `password`.
26
+ * Returns `{ ok: false }` on wrong password or non-PDF input. Returns
27
+ * `{ ok: true, decrypted }` on success. If the input wasn't encrypted to begin
28
+ * with, returns `{ ok: true, decrypted: bytes }` unchanged.
29
+ */
30
+ export async function unlock(bytes, password) {
31
+ const mupdf = await getMupdf();
32
+ const doc = mupdf.Document.openDocument(bytes, "application/pdf");
33
+ try {
34
+ if (!(doc instanceof mupdf.PDFDocument)) {
35
+ return { ok: false };
36
+ }
37
+ if (!doc.needsPassword()) {
38
+ return { ok: true, decrypted: bytes };
39
+ }
40
+ const result = doc.authenticatePassword(password);
41
+ if (result === MUPDF_AUTH_FAILED) {
42
+ return { ok: false };
43
+ }
44
+ const out = doc.saveToBuffer("decrypt");
45
+ return { ok: true, decrypted: Buffer.from(out.asUint8Array()) };
46
+ }
47
+ finally {
48
+ doc.destroy();
49
+ }
50
+ }
@@ -0,0 +1,17 @@
1
+ import type { DocumentBlock } from "../../ai/provider.js";
2
+ export interface LoadedFile {
3
+ bytes: Buffer;
4
+ hash: string;
5
+ mime: string;
6
+ fileName: string;
7
+ }
8
+ /**
9
+ * Read a local PDF, hash its bytes, and return everything the scan pipeline
10
+ * needs to decide whether to skip / re-scan / unlock the file. The hash is
11
+ * sha256 of the original on-disk bytes (still encrypted if the PDF is
12
+ * password-protected) — that's what the dedup contract relies on, so we can
13
+ * recognize the same file across re-scans regardless of unlock state.
14
+ */
15
+ export declare function readPdf(path: string): LoadedFile;
16
+ /** Build an Anthropic-compatible document content block from PDF bytes. */
17
+ export declare function buildDocumentBlock(bytes: Buffer, fileName: string, mime?: string): DocumentBlock;
@@ -0,0 +1,36 @@
1
+ import { readFileSync, statSync } from "fs";
2
+ import { createHash } from "crypto";
3
+ import { basename, extname } from "path";
4
+ const MIME_BY_EXT = {
5
+ ".pdf": "application/pdf",
6
+ };
7
+ const MAX_BYTES = 30 * 1024 * 1024;
8
+ /**
9
+ * Read a local PDF, hash its bytes, and return everything the scan pipeline
10
+ * needs to decide whether to skip / re-scan / unlock the file. The hash is
11
+ * sha256 of the original on-disk bytes (still encrypted if the PDF is
12
+ * password-protected) — that's what the dedup contract relies on, so we can
13
+ * recognize the same file across re-scans regardless of unlock state.
14
+ */
15
+ export function readPdf(path) {
16
+ const ext = extname(path).toLowerCase();
17
+ const mime = MIME_BY_EXT[ext];
18
+ if (!mime) {
19
+ throw new Error(`Unsupported file extension: ${ext}. Plasalid v1 only ingests PDFs.`);
20
+ }
21
+ const stat = statSync(path);
22
+ if (stat.size > MAX_BYTES) {
23
+ throw new Error(`File too large (${stat.size} bytes). Limit is ${MAX_BYTES} bytes.`);
24
+ }
25
+ const bytes = readFileSync(path);
26
+ const hash = createHash("sha256").update(bytes).digest("hex");
27
+ return { bytes, hash, mime, fileName: basename(path) };
28
+ }
29
+ /** Build an Anthropic-compatible document content block from PDF bytes. */
30
+ export function buildDocumentBlock(bytes, fileName, mime = "application/pdf") {
31
+ return {
32
+ type: "document",
33
+ source: { type: "base64", media_type: mime, data: bytes.toString("base64") },
34
+ title: fileName,
35
+ };
36
+ }
@@ -0,0 +1,60 @@
1
+ import type { StoredPassword } from "./password-store.js";
2
+ /**
3
+ * Pure state machine for the unlock phase of a single file scan. Side effects
4
+ * (mupdf calls, prompts, DB reads) live in the orchestrator; this module only
5
+ * encodes the transition logic so it can be exhaustively unit-tested.
6
+ */
7
+ export declare const MAX_PASSWORD_ATTEMPTS = 10;
8
+ export type UnlockOutcome = {
9
+ kind: "plaintext";
10
+ } | {
11
+ kind: "from-store";
12
+ storedId: string;
13
+ } | {
14
+ kind: "from-user";
15
+ password: string;
16
+ };
17
+ export type UnlockState = {
18
+ kind: "init";
19
+ } | {
20
+ kind: "trying-stored";
21
+ candidates: StoredPassword[];
22
+ } | {
23
+ kind: "awaiting-user";
24
+ attempt: number;
25
+ } | {
26
+ kind: "done";
27
+ decrypted: Buffer;
28
+ outcome: UnlockOutcome;
29
+ } | {
30
+ kind: "failed";
31
+ reason: string;
32
+ };
33
+ export type UnlockEvent = {
34
+ kind: "INSPECTED_PLAINTEXT";
35
+ bytes: Buffer;
36
+ } | {
37
+ kind: "INSPECTED_ENCRYPTED";
38
+ candidates: StoredPassword[];
39
+ } | {
40
+ kind: "STORED_UNLOCK_OK";
41
+ decrypted: Buffer;
42
+ usedStoredId: string;
43
+ } | {
44
+ kind: "STORED_UNLOCK_EXHAUSTED";
45
+ } | {
46
+ kind: "USER_CANCELLED";
47
+ } | {
48
+ kind: "UNLOCK_OK";
49
+ decrypted: Buffer;
50
+ password: string;
51
+ } | {
52
+ kind: "UNLOCK_FAIL";
53
+ };
54
+ export declare function isTerminal(state: UnlockState): boolean;
55
+ /**
56
+ * Pure transition. Throws if the event doesn't make sense for the current state;
57
+ * the orchestrator never produces such combinations, so reaching the throw is a
58
+ * programmer error worth surfacing loudly.
59
+ */
60
+ export declare function transition(state: UnlockState, event: UnlockEvent): UnlockState;
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Pure state machine for the unlock phase of a single file scan. Side effects
3
+ * (mupdf calls, prompts, DB reads) live in the orchestrator; this module only
4
+ * encodes the transition logic so it can be exhaustively unit-tested.
5
+ */
6
+ export const MAX_PASSWORD_ATTEMPTS = 10;
7
+ export function isTerminal(state) {
8
+ return state.kind === "done" || state.kind === "failed";
9
+ }
10
+ /**
11
+ * Pure transition. Throws if the event doesn't make sense for the current state;
12
+ * the orchestrator never produces such combinations, so reaching the throw is a
13
+ * programmer error worth surfacing loudly.
14
+ */
15
+ export function transition(state, event) {
16
+ switch (state.kind) {
17
+ case "init":
18
+ if (event.kind === "INSPECTED_PLAINTEXT") {
19
+ return { kind: "done", decrypted: event.bytes, outcome: { kind: "plaintext" } };
20
+ }
21
+ if (event.kind === "INSPECTED_ENCRYPTED") {
22
+ return { kind: "trying-stored", candidates: event.candidates };
23
+ }
24
+ break;
25
+ case "trying-stored":
26
+ if (event.kind === "STORED_UNLOCK_OK") {
27
+ return {
28
+ kind: "done",
29
+ decrypted: event.decrypted,
30
+ outcome: { kind: "from-store", storedId: event.usedStoredId },
31
+ };
32
+ }
33
+ if (event.kind === "STORED_UNLOCK_EXHAUSTED") {
34
+ return { kind: "awaiting-user", attempt: 1 };
35
+ }
36
+ break;
37
+ case "awaiting-user":
38
+ if (event.kind === "USER_CANCELLED") {
39
+ return { kind: "failed", reason: "password required" };
40
+ }
41
+ if (event.kind === "UNLOCK_OK") {
42
+ return {
43
+ kind: "done",
44
+ decrypted: event.decrypted,
45
+ outcome: { kind: "from-user", password: event.password },
46
+ };
47
+ }
48
+ if (event.kind === "UNLOCK_FAIL") {
49
+ if (state.attempt >= MAX_PASSWORD_ATTEMPTS) {
50
+ return {
51
+ kind: "failed",
52
+ reason: `incorrect password after ${MAX_PASSWORD_ATTEMPTS} attempts`,
53
+ };
54
+ }
55
+ return { kind: "awaiting-user", attempt: state.attempt + 1 };
56
+ }
57
+ break;
58
+ case "done":
59
+ case "failed":
60
+ // Terminal — no further transitions.
61
+ break;
62
+ }
63
+ throw new Error(`Invalid unlock transition: ${state.kind} + ${event.kind}`);
64
+ }
@@ -0,0 +1,22 @@
1
+ import type Database from "libsql";
2
+ import { type UnlockOutcome } from "./state-machine.js";
3
+ export interface UnlockCtx {
4
+ db: Database.Database;
5
+ filePath: string;
6
+ bytes: Buffer;
7
+ interactive: boolean;
8
+ }
9
+ /**
10
+ * Drive the pure unlock state machine to a terminal state, returning the
11
+ * decrypted bytes and the outcome (plaintext / from-store / from-user) so the
12
+ * caller can persist passwords or record stored-key usage.
13
+ */
14
+ export declare function unlockIfNeeded(ctx: UnlockCtx): Promise<{
15
+ decrypted: Buffer;
16
+ outcome: UnlockOutcome;
17
+ }>;
18
+ /**
19
+ * After a successful unlock: bump usage on a stored hit, save a fresh user
20
+ * password under a filename-pattern key, or no-op for plaintext.
21
+ */
22
+ export declare function persistUnlockOutcome(db: Database.Database, filePath: string, outcome: UnlockOutcome): void;
@@ -0,0 +1,121 @@
1
+ import inquirer from "inquirer";
2
+ import { basename } from "path";
3
+ import { config } from "../../config.js";
4
+ import { statusSpinner } from "../../cli/ux.js";
5
+ import { findCandidates, savePassword, recordUse, suggestPattern, } from "./password-store.js";
6
+ import { transition, isTerminal, MAX_PASSWORD_ATTEMPTS, } from "./state-machine.js";
7
+ import { isEncrypted, unlock } from "./pdf-unlock.js";
8
+ /**
9
+ * Drive the pure unlock state machine to a terminal state, returning the
10
+ * decrypted bytes and the outcome (plaintext / from-store / from-user) so the
11
+ * caller can persist passwords or record stored-key usage.
12
+ */
13
+ export async function unlockIfNeeded(ctx) {
14
+ let state = { kind: "init" };
15
+ while (!isTerminal(state)) {
16
+ const event = await stepUnlock(state, ctx);
17
+ state = transition(state, event);
18
+ }
19
+ if (state.kind === "failed") {
20
+ throw new Error(state.reason);
21
+ }
22
+ if (state.kind !== "done") {
23
+ throw new Error(`unlock loop exited in non-terminal state ${state.kind}`);
24
+ }
25
+ return { decrypted: state.decrypted, outcome: state.outcome };
26
+ }
27
+ async function stepUnlock(state, ctx) {
28
+ switch (state.kind) {
29
+ case "init": {
30
+ const spinner = statusSpinner(`Inspecting ${basename(ctx.filePath)}...`);
31
+ try {
32
+ const encrypted = await isEncrypted(ctx.bytes);
33
+ if (!encrypted) {
34
+ spinner.succeed(`${basename(ctx.filePath)} is not encrypted.`);
35
+ return { kind: "INSPECTED_PLAINTEXT", bytes: ctx.bytes };
36
+ }
37
+ const candidates = findCandidates(ctx.db, ctx.filePath, config.dbEncryptionKey);
38
+ spinner.info(`${basename(ctx.filePath)} is encrypted (${candidates.length} saved password${candidates.length === 1 ? "" : "s"} match).`);
39
+ return { kind: "INSPECTED_ENCRYPTED", candidates };
40
+ }
41
+ catch (err) {
42
+ spinner.fail("Inspection failed.");
43
+ throw err;
44
+ }
45
+ }
46
+ case "trying-stored":
47
+ return await tryStoredPasswords(ctx.bytes, state.candidates);
48
+ case "awaiting-user": {
49
+ if (!ctx.interactive) {
50
+ return { kind: "USER_CANCELLED" };
51
+ }
52
+ const password = await promptForPassword(basename(ctx.filePath), state.attempt);
53
+ if (!password) {
54
+ return { kind: "USER_CANCELLED" };
55
+ }
56
+ const spinner = statusSpinner("Decrypting...");
57
+ const result = await unlock(ctx.bytes, password);
58
+ if (result.ok && result.decrypted) {
59
+ spinner.succeed("Decrypted.");
60
+ return { kind: "UNLOCK_OK", decrypted: result.decrypted, password };
61
+ }
62
+ spinner.fail(`Incorrect password (attempt ${state.attempt}/${MAX_PASSWORD_ATTEMPTS}).`);
63
+ return { kind: "UNLOCK_FAIL" };
64
+ }
65
+ default:
66
+ throw new Error(`stepUnlock called with terminal state ${state.kind}`);
67
+ }
68
+ }
69
+ async function tryStoredPasswords(bytes, candidates) {
70
+ if (candidates.length === 0) {
71
+ return { kind: "STORED_UNLOCK_EXHAUSTED" };
72
+ }
73
+ const spinner = statusSpinner(`Trying saved password 1/${candidates.length}...`);
74
+ for (let i = 0; i < candidates.length; i++) {
75
+ const cand = candidates[i];
76
+ spinner.text = `Trying saved password ${i + 1}/${candidates.length} (pattern ${cand.pattern})`;
77
+ const result = await unlock(bytes, cand.password);
78
+ if (result.ok && result.decrypted) {
79
+ spinner.succeed(`Unlocked with saved password (pattern ${cand.pattern}).`);
80
+ return {
81
+ kind: "STORED_UNLOCK_OK",
82
+ decrypted: result.decrypted,
83
+ usedStoredId: cand.id,
84
+ };
85
+ }
86
+ }
87
+ spinner.info("No saved password matched. Asking the user.");
88
+ return { kind: "STORED_UNLOCK_EXHAUSTED" };
89
+ }
90
+ async function promptForPassword(fileName, attempt) {
91
+ const message = attempt === 1
92
+ ? `This PDF is encrypted. Password for ${fileName}:`
93
+ : `Password for ${fileName} (attempt ${attempt}/${MAX_PASSWORD_ATTEMPTS}):`;
94
+ const { password } = await inquirer.prompt([
95
+ { type: "password", name: "password", mask: "*", message },
96
+ ]);
97
+ return String(password ?? "").trim();
98
+ }
99
+ const PERSIST = {
100
+ plaintext: () => { },
101
+ "from-store": (db, _filePath, o) => { recordUse(db, o.storedId); },
102
+ "from-user": (db, filePath, o) => {
103
+ const pattern = suggestPattern(filePath);
104
+ const spinner = statusSpinner(`Saving password for pattern ${pattern}...`);
105
+ try {
106
+ savePassword(db, pattern, o.password, config.dbEncryptionKey);
107
+ spinner.succeed(`Saved password for pattern ${pattern} in secure vault.`);
108
+ }
109
+ catch (err) {
110
+ spinner.fail(`Could not save password: ${err instanceof Error ? err.message : String(err)}`);
111
+ throw err;
112
+ }
113
+ },
114
+ };
115
+ /**
116
+ * After a successful unlock: bump usage on a stored hit, save a fresh user
117
+ * password under a filename-pattern key, or no-op for plaintext.
118
+ */
119
+ export function persistUnlockOutcome(db, filePath, outcome) {
120
+ PERSIST[outcome.kind](db, filePath, outcome);
121
+ }
@@ -0,0 +1,10 @@
1
+ import type Database from "libsql";
2
+ import type { ScanState } from "./engine.js";
3
+ import type { ScanHooks } from "./hooks.js";
4
+ /**
5
+ * Phase 1 — walk the data dir, optionally filter by regex, decrypt each file
6
+ * sequentially (password prompts can't share a TTY). Output partitions into
7
+ * decrypted / skipped / failed via a kind-keyed dispatch map. Bootstrapped
8
+ * scanned_files rows are tagged onto each DecryptedFile.
9
+ */
10
+ export declare function decryptPhase(db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
@@ -0,0 +1,80 @@
1
+ import { randomUUID } from "crypto";
2
+ import { readPdf } from "./pdf/pdf.js";
3
+ import { unlockIfNeeded, persistUnlockOutcome } from "./pdf/unlock.js";
4
+ import { scanDataDir } from "./walker.js";
5
+ import { tryExecute } from "./result.js";
6
+ function findScannedByHash(db, hash) {
7
+ return db
8
+ .prepare(`SELECT id FROM scanned_files WHERE file_hash = ?`)
9
+ .get(hash) ?? null;
10
+ }
11
+ async function decryptOne(db, file, opts) {
12
+ const read = await tryExecute(() => readPdf(file.path));
13
+ if (!read.ok)
14
+ return { kind: "failed", error: `read failed: ${read.error}` };
15
+ const pdf = read.value;
16
+ const existing = findScannedByHash(db, pdf.hash);
17
+ if (existing && !opts.force) {
18
+ return { kind: "skipped", existingScannedFileId: existing.id };
19
+ }
20
+ const unlock = await tryExecute(() => unlockIfNeeded({
21
+ db,
22
+ filePath: file.path,
23
+ bytes: pdf.bytes,
24
+ interactive: opts.interactive,
25
+ }));
26
+ if (!unlock.ok)
27
+ return { kind: "failed", error: unlock.error || "unlock failed" };
28
+ persistUnlockOutcome(db, file.path, unlock.value.outcome);
29
+ return {
30
+ kind: "decrypted",
31
+ file: {
32
+ path: file.path,
33
+ fileName: file.name,
34
+ relPath: file.relPath,
35
+ hash: pdf.hash,
36
+ mime: pdf.mime,
37
+ decryptedBytes: unlock.value.decrypted,
38
+ replacesPriorScannedFileId: existing?.id,
39
+ },
40
+ };
41
+ }
42
+ const APPLY = {
43
+ decrypted: (state, _file, o) => { state.decrypted.push(o.file); },
44
+ skipped: (state, file, o) => { state.skipped.push({ file, existingScannedFileId: o.existingScannedFileId }); },
45
+ failed: (state, file, o) => { state.failed.push({ file, error: o.error }); },
46
+ };
47
+ /**
48
+ * Bootstrap one scanned_files row per decrypted file. Chunk workers later
49
+ * stamp transactions with source_file_id, so the row must exist before any
50
+ * tool writes hit the DB. Status flips to 'scanned' after parse completes.
51
+ */
52
+ function bootstrapScannedFiles(db, state) {
53
+ for (const file of state.decrypted) {
54
+ if (file.replacesPriorScannedFileId) {
55
+ db.prepare(`DELETE FROM scanned_files WHERE id = ?`).run(file.replacesPriorScannedFileId);
56
+ }
57
+ const sfId = `sf:${randomUUID()}`;
58
+ db.prepare(`INSERT INTO scanned_files (id, path, file_hash, mime, status) VALUES (?, ?, ?, ?, 'pending')`).run(sfId, file.path, file.hash, file.mime);
59
+ file.scannedFileId = sfId;
60
+ }
61
+ }
62
+ /**
63
+ * Phase 1 — walk the data dir, optionally filter by regex, decrypt each file
64
+ * sequentially (password prompts can't share a TTY). Output partitions into
65
+ * decrypted / skipped / failed via a kind-keyed dispatch map. Bootstrapped
66
+ * scanned_files rows are tagged onto each DecryptedFile.
67
+ */
68
+ export async function decryptPhase(db, state, hooks) {
69
+ await hooks.beforeDecrypt?.(state);
70
+ const matcher = state.options.regex ? new RegExp(state.options.regex, "i") : null;
71
+ state.files = scanDataDir().filter(f => (matcher ? matcher.test(f.relPath) : true));
72
+ const interactive = state.options.interactive ?? true;
73
+ const force = !!state.options.force;
74
+ for (const file of state.files) {
75
+ const outcome = await decryptOne(db, file, { force, interactive });
76
+ APPLY[outcome.kind](state, file, outcome);
77
+ }
78
+ bootstrapScannedFiles(db, state);
79
+ await hooks.afterDecrypt?.(state);
80
+ }
@@ -0,0 +1,10 @@
1
+ import type Database from "libsql";
2
+ import type { ScanState } from "./engine.js";
3
+ import type { ScanHooks } from "./hooks.js";
4
+ /**
5
+ * Phase 3 — fan out FileWorkers in parallel. Each FileWorker fans out its
6
+ * file's chunks in parallel internally. The scanId + progress sink are
7
+ * threaded through ScanState; chunk-worker tools write to the DB directly
8
+ * and tick the progress sink as they go.
9
+ */
10
+ export declare function parsePhase(db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
@@ -0,0 +1,46 @@
1
+ import { runWithConcurrency } from "./concurrency.js";
2
+ import { runFileWorker } from "./file-worker.js";
3
+ import { errorMessage } from "./result.js";
4
+ const DEFAULT_MAX_FILE_WORKERS = 5;
5
+ const DEFAULT_MAX_CHUNK_WORKERS_PER_FILE = 5;
6
+ const HARD_CAP = 8;
7
+ const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
8
+ /**
9
+ * Phase 3 — fan out FileWorkers in parallel. Each FileWorker fans out its
10
+ * file's chunks in parallel internally. The scanId + progress sink are
11
+ * threaded through ScanState; chunk-worker tools write to the DB directly
12
+ * and tick the progress sink as they go.
13
+ */
14
+ export async function parsePhase(db, state, hooks) {
15
+ await hooks.beforeParse?.(state);
16
+ const maxFile = clamp(state.options.maxFileWorkers, DEFAULT_MAX_FILE_WORKERS);
17
+ const maxChunk = clamp(state.options.maxChunkWorkersPerFile, DEFAULT_MAX_CHUNK_WORKERS_PER_FILE);
18
+ const fileGroups = state.decrypted
19
+ .map(file => ({
20
+ fileId: file.path,
21
+ scannedFileId: file.scannedFileId,
22
+ chunks: state.chunks.filter(c => c.fileId === file.path),
23
+ }))
24
+ .filter(g => g.chunks.length > 0);
25
+ const tasks = fileGroups.map(group => () => runFileWorker({
26
+ db,
27
+ scanId: state.scanId,
28
+ scannedFileId: group.scannedFileId,
29
+ progress: state.progress,
30
+ fileId: group.fileId,
31
+ chunks: group.chunks,
32
+ maxChunkWorkers: maxChunk,
33
+ }, hooks));
34
+ const settled = await runWithConcurrency(tasks, maxFile);
35
+ for (let i = 0; i < settled.length; i++) {
36
+ const r = settled[i];
37
+ if (!r.ok)
38
+ state.errors.push({ phase: "parse", target: fileGroups[i].fileId, error: errorMessage(r.error) });
39
+ }
40
+ for (const file of state.decrypted) {
41
+ if (!file.scannedFileId)
42
+ continue;
43
+ db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now') WHERE id = ?`).run(file.scannedFileId);
44
+ }
45
+ await hooks.afterParse?.(state);
46
+ }
@@ -0,0 +1,8 @@
1
+ import type Database from "libsql";
2
+ import type { ScanState } from "../engine.js";
3
+ import type { ScanHooks } from "../hooks.js";
4
+ /**
5
+ * Phase 2 — turn every decrypted file into a list of single-page Chunks.
6
+ * Sequential across files (cheap in-memory operation, no contention).
7
+ */
8
+ export declare function chunkPhase(_db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
@@ -0,0 +1,13 @@
1
+ import { chunkPdf } from "../chunker.js";
2
+ /**
3
+ * Phase 2 — turn every decrypted file into a list of single-page Chunks.
4
+ * Sequential across files (cheap in-memory operation, no contention).
5
+ */
6
+ export async function chunkPhase(_db, state, hooks) {
7
+ await hooks.beforeChunk?.(state);
8
+ for (const file of state.decrypted) {
9
+ const chunks = await chunkPdf(file);
10
+ state.chunks.push(...chunks);
11
+ }
12
+ await hooks.afterChunk?.(state);
13
+ }
@@ -0,0 +1,12 @@
1
+ import type Database from "libsql";
2
+ import { type TransactionInput } from "../../db/queries/transactions.js";
3
+ import type { ScanState } from "../engine.js";
4
+ import type { ScanHooks } from "../hooks.js";
5
+ /**
6
+ * Phase 5 — flush the shared buffer to the DB. Per-row transactions so one
7
+ * bad row drops only itself (lands as a scan_commit_failure unknown). Every
8
+ * successful mutation appends an action_log row keyed to scanId so the run
9
+ * can be reverted as a unit.
10
+ */
11
+ export declare function commitPhase(db: Database.Database, state: ScanState, hooks: ScanHooks): Promise<void>;
12
+ export type { TransactionInput };