plasalid 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/README.md +2 -2
  2. package/dist/ai/agent.d.ts +6 -7
  3. package/dist/ai/agent.js +27 -11
  4. package/dist/ai/personas.js +48 -46
  5. package/dist/ai/system-prompt.js +1 -1
  6. package/dist/ai/tools/account-mutex.d.ts +1 -0
  7. package/dist/ai/tools/account-mutex.js +16 -0
  8. package/dist/ai/tools/index.js +4 -12
  9. package/dist/ai/tools/ingest.d.ts +1 -1
  10. package/dist/ai/tools/ingest.js +282 -242
  11. package/dist/ai/tools/merchants.js +1 -28
  12. package/dist/ai/tools/read.js +8 -8
  13. package/dist/ai/tools/record.js +3 -36
  14. package/dist/ai/tools/resolve.js +25 -22
  15. package/dist/ai/tools/scan.js +0 -1
  16. package/dist/ai/tools/types.d.ts +14 -21
  17. package/dist/cli/commands/record.js +1 -82
  18. package/dist/cli/commands/resolve.d.ts +5 -2
  19. package/dist/cli/commands/resolve.js +36 -5
  20. package/dist/cli/commands/revert.js +4 -2
  21. package/dist/cli/commands/rules.js +2 -2
  22. package/dist/cli/commands/scan.js +199 -128
  23. package/dist/cli/commands/status.js +5 -5
  24. package/dist/cli/index.js +8 -29
  25. package/dist/cli/ink/ScanDashboard.d.ts +49 -0
  26. package/dist/cli/ink/ScanDashboard.js +214 -0
  27. package/dist/cli/ink/scan_dashboard.d.ts +40 -25
  28. package/dist/cli/ink/scan_dashboard.js +139 -44
  29. package/dist/db/queries/account-balance.d.ts +1 -1
  30. package/dist/db/queries/questions.d.ts +62 -0
  31. package/dist/db/queries/questions.js +110 -0
  32. package/dist/db/queries/transactions.d.ts +1 -1
  33. package/dist/db/queries/unknowns.d.ts +17 -15
  34. package/dist/db/queries/unknowns.js +35 -39
  35. package/dist/db/schema.js +6 -28
  36. package/dist/scanner/audit/auditor.d.ts +31 -0
  37. package/dist/scanner/audit/auditor.js +72 -0
  38. package/dist/scanner/audit/engine.d.ts +10 -0
  39. package/dist/scanner/audit/engine.js +98 -0
  40. package/dist/scanner/audit/eventBus.d.ts +60 -0
  41. package/dist/scanner/audit/eventBus.js +35 -0
  42. package/dist/scanner/audit/passes/index.d.ts +11 -0
  43. package/dist/scanner/audit/passes/index.js +9 -0
  44. package/dist/scanner/audit/passes/types.d.ts +23 -0
  45. package/dist/scanner/audit/passes/types.js +1 -0
  46. package/dist/scanner/audit/types.d.ts +27 -0
  47. package/dist/scanner/audit/types.js +1 -0
  48. package/dist/scanner/auditor.d.ts +51 -0
  49. package/dist/scanner/auditor.js +80 -0
  50. package/dist/scanner/buffer/engine.d.ts +9 -0
  51. package/dist/scanner/buffer/engine.js +110 -0
  52. package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
  53. package/dist/scanner/buffer/sharedBuffer.js +130 -0
  54. package/dist/scanner/buffer/types.d.ts +67 -0
  55. package/dist/scanner/buffer/types.js +1 -0
  56. package/dist/scanner/buffer.d.ts +45 -38
  57. package/dist/scanner/buffer.js +93 -61
  58. package/dist/scanner/bus/engine.d.ts +11 -0
  59. package/dist/scanner/bus/engine.js +42 -0
  60. package/dist/scanner/bus/types.d.ts +53 -0
  61. package/dist/scanner/bus/types.js +1 -0
  62. package/dist/scanner/bus.d.ts +38 -0
  63. package/dist/scanner/bus.js +37 -0
  64. package/dist/scanner/chunk-worker.d.ts +19 -0
  65. package/dist/scanner/chunk-worker.js +67 -0
  66. package/dist/scanner/chunkWorker.d.ts +20 -0
  67. package/dist/scanner/chunkWorker.js +59 -0
  68. package/dist/scanner/chunker/chunker.d.ts +7 -0
  69. package/dist/scanner/chunker/chunker.js +60 -0
  70. package/dist/scanner/chunker.d.ts +7 -0
  71. package/dist/scanner/chunker.js +60 -0
  72. package/dist/scanner/converge.d.ts +29 -0
  73. package/dist/scanner/converge.js +15 -0
  74. package/dist/scanner/decrypt.d.ts +10 -0
  75. package/dist/scanner/decrypt.js +80 -0
  76. package/dist/scanner/engine/scanEngine.d.ts +24 -0
  77. package/dist/scanner/engine/scanEngine.js +87 -0
  78. package/dist/scanner/engine/types.d.ts +90 -0
  79. package/dist/scanner/engine/types.js +1 -0
  80. package/dist/scanner/engine.d.ts +90 -0
  81. package/dist/scanner/engine.js +84 -0
  82. package/dist/scanner/file-worker.d.ts +33 -0
  83. package/dist/scanner/file-worker.js +28 -0
  84. package/dist/scanner/fileWorker.d.ts +33 -0
  85. package/dist/scanner/fileWorker.js +22 -0
  86. package/dist/scanner/hooks/types.d.ts +25 -0
  87. package/dist/scanner/hooks/types.js +1 -0
  88. package/dist/scanner/hooks.d.ts +23 -0
  89. package/dist/scanner/hooks.js +1 -0
  90. package/dist/scanner/parse.d.ts +10 -0
  91. package/dist/scanner/parse.js +47 -0
  92. package/dist/scanner/passes/index.d.ts +8 -0
  93. package/dist/scanner/passes/index.js +6 -0
  94. package/dist/scanner/passes/types.d.ts +22 -0
  95. package/dist/scanner/passes/types.js +1 -0
  96. package/dist/scanner/pdf/chunker.d.ts +7 -0
  97. package/dist/scanner/pdf/chunker.js +60 -0
  98. package/dist/scanner/pdf/password-store.d.ts +34 -0
  99. package/dist/scanner/pdf/password-store.js +83 -0
  100. package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
  101. package/dist/scanner/pdf/pdf-unlock.js +50 -0
  102. package/dist/scanner/pdf/pdf.d.ts +17 -0
  103. package/dist/scanner/pdf/pdf.js +36 -0
  104. package/dist/scanner/pdf/state-machine.d.ts +60 -0
  105. package/dist/scanner/pdf/state-machine.js +64 -0
  106. package/dist/scanner/pdf/unlock.d.ts +22 -0
  107. package/dist/scanner/pdf/unlock.js +121 -0
  108. package/dist/scanner/phase-decrypt.d.ts +10 -0
  109. package/dist/scanner/phase-decrypt.js +80 -0
  110. package/dist/scanner/phase-parse.d.ts +10 -0
  111. package/dist/scanner/phase-parse.js +46 -0
  112. package/dist/scanner/phases/chunk.d.ts +8 -0
  113. package/dist/scanner/phases/chunk.js +13 -0
  114. package/dist/scanner/phases/commit.d.ts +12 -0
  115. package/dist/scanner/phases/commit.js +140 -0
  116. package/dist/scanner/phases/decrypt.d.ts +10 -0
  117. package/dist/scanner/phases/decrypt.js +80 -0
  118. package/dist/scanner/phases/parse.d.ts +10 -0
  119. package/dist/scanner/phases/parse.js +46 -0
  120. package/dist/scanner/phases/resolve.d.ts +10 -0
  121. package/dist/scanner/phases/resolve.js +17 -0
  122. package/dist/scanner/phases/review.d.ts +10 -0
  123. package/dist/scanner/phases/review.js +12 -0
  124. package/dist/scanner/progress.d.ts +14 -0
  125. package/dist/scanner/progress.js +21 -0
  126. package/dist/scanner/resolver-memory.d.ts +8 -0
  127. package/dist/scanner/resolver-memory.js +24 -0
  128. package/dist/scanner/resolver.d.ts +39 -0
  129. package/dist/scanner/resolver.js +196 -0
  130. package/dist/scanner/result.d.ts +17 -0
  131. package/dist/scanner/result.js +19 -0
  132. package/dist/scanner/run-passes.d.ts +30 -0
  133. package/dist/scanner/run-passes.js +15 -0
  134. package/dist/scanner/unlock.js +1 -1
  135. package/dist/scanner/worker.d.ts +19 -0
  136. package/dist/scanner/worker.js +67 -0
  137. package/dist/scanner/workers/chunkWorker.d.ts +20 -0
  138. package/dist/scanner/workers/chunkWorker.js +65 -0
  139. package/dist/scanner/workers/fileWorker.d.ts +32 -0
  140. package/dist/scanner/workers/fileWorker.js +22 -0
  141. package/package.json +1 -1
@@ -1,51 +1,58 @@
1
- import type Database from "libsql";
2
- import { type TransactionInput } from "../db/queries/transactions.js";
1
+ import type { TransactionInput } from "../db/queries/transactions.js";
2
+ import type { Bus } from "./bus.js";
3
3
  /**
4
- * One scan agent's pending writes. Transactions and unknowns accumulate here
5
- * while the LLM works; nothing hits the DB until `commit()` runs inside a
6
- * single SQLite transaction. If `commit()` throws, the transaction rolls back
7
- * and the DB stays exactly as it was before this file's scan began.
8
- *
9
- * Account writes (`create_account`, `update_account_metadata`) and merchant
10
- * writes deliberately bypass the buffer — they go directly to the DB through
11
- * their own mutexes so concurrent agents see each other's creates and don't
12
- * duplicate.
4
+ * One in-flight transaction held by the buffer. The id is synthesized on
5
+ * append so the agent can refer to it in the same turn (e.g. to attach a
6
+ * note_unknown). `chunkId` records who created it — audit passes use this
7
+ * to dedup across chunks and merge boundary continuations.
13
8
  */
9
+ export interface BufferedTransaction {
10
+ readonly transaction_id: string;
11
+ readonly chunkId: string;
12
+ input: TransactionInput;
13
+ }
14
14
  export interface BufferedUnknown {
15
- /** Synthesized when the LLM called note_unknown with a buffered transaction_id. */
15
+ readonly unknown_id: string;
16
+ readonly chunkId: string | null;
16
17
  transaction_id: string | null;
17
18
  account_id: string | null;
18
- kind?: string | null;
19
+ kind: string | null;
19
20
  prompt: string;
20
21
  options?: string[];
22
+ /** Null until the auditor or review TUI closes it. */
23
+ answer: string | null;
21
24
  }
22
- export interface BufferedTransaction {
23
- /** Synthesized at queue-time so unknowns can reference this transaction. */
24
- transaction_id: string;
25
- input: TransactionInput;
25
+ export interface BufferSnapshot {
26
+ readonly transactions: readonly BufferedTransaction[];
27
+ readonly unknowns: readonly BufferedUnknown[];
28
+ readonly accountsCreated: readonly string[];
29
+ readonly merchantsCreated: readonly string[];
30
+ readonly fileIds: readonly string[];
26
31
  }
27
- export declare class BufferedWriteContext {
28
- readonly fileName: string;
29
- readonly transactions: BufferedTransaction[];
30
- readonly unknowns: BufferedUnknown[];
31
- doneSummary: string | null;
32
- constructor(fileName: string);
33
- /**
34
- * Queue a transaction. Returns the synthesized transaction id so the agent
35
- * can use it in subsequent note_unknown calls inside the same file.
36
- */
37
- appendTransaction(input: TransactionInput): string;
38
- appendUnknown(unknown: BufferedUnknown): void;
39
- markDone(summary: string): void;
40
- get isDone(): boolean;
41
- /**
42
- * Replay all buffered writes inside one DB transaction. `scannedFileId` is
43
- * stamped onto every transaction and unknown so they're attributable to this
44
- * file. Returns `{ transactions, unknowns }` counts so the caller can report
45
- * them.
46
- */
47
- commit(db: Database.Database, scannedFileId: string): {
32
+ /**
33
+ * The shared buffer every chunk worker writes to during a scan run. Mutations
34
+ * serialize through an internal mutex; reads are lock-free. Every mutation
35
+ * publishes a typed event on the bus so the auditor can react in flight.
36
+ */
37
+ export interface ScanBuffer {
38
+ readonly scanId: string;
39
+ appendTransaction(input: TransactionInput, chunkId: string): Promise<string>;
40
+ updateTransaction(id: string, mut: (current: BufferedTransaction) => BufferedTransaction): Promise<boolean>;
41
+ removeTransaction(id: string, reason: string): Promise<boolean>;
42
+ appendUnknown(input: Omit<BufferedUnknown, "unknown_id" | "answer">): Promise<string>;
43
+ closeUnknown(id: string, answer: string): Promise<boolean>;
44
+ /** Returns true if newly recorded; false if already present. */
45
+ recordFile(fileId: string): boolean;
46
+ recordAccountCreated(accountId: string): boolean;
47
+ recordMerchantCreated(merchantId: string): boolean;
48
+ snapshot(): BufferSnapshot;
49
+ size(): {
48
50
  transactions: number;
49
51
  unknowns: number;
52
+ openUnknowns: number;
50
53
  };
54
+ getTransaction(id: string): BufferedTransaction | undefined;
55
+ filterTransactions(predicate: (tx: BufferedTransaction) => boolean): IterableIterator<BufferedTransaction>;
56
+ openUnknowns(): BufferedUnknown[];
51
57
  }
58
+ export declare function createBuffer(scanId: string, bus: Bus): ScanBuffer;
@@ -1,63 +1,95 @@
1
1
  import { randomUUID } from "crypto";
2
- import { insertTransactionRows, validateTransaction, } from "../db/queries/transactions.js";
3
- import { recordUnknown } from "../db/queries/unknowns.js";
4
- export class BufferedWriteContext {
5
- fileName;
6
- transactions = [];
7
- unknowns = [];
8
- doneSummary = null;
9
- constructor(fileName) {
10
- this.fileName = fileName;
11
- }
12
- /**
13
- * Queue a transaction. Returns the synthesized transaction id so the agent
14
- * can use it in subsequent note_unknown calls inside the same file.
15
- */
16
- appendTransaction(input) {
17
- const transactionId = `tx:${randomUUID()}`;
18
- this.transactions.push({ transaction_id: transactionId, input });
19
- return transactionId;
20
- }
21
- appendUnknown(unknown) {
22
- this.unknowns.push(unknown);
23
- }
24
- markDone(summary) {
25
- this.doneSummary = summary;
26
- }
27
- get isDone() {
28
- return this.doneSummary !== null;
29
- }
30
- /**
31
- * Replay all buffered writes inside one DB transaction. `scannedFileId` is
32
- * stamped onto every transaction and unknown so they're attributable to this
33
- * file. Returns `{ transactions, unknowns }` counts so the caller can report
34
- * them.
35
- */
36
- commit(db, scannedFileId) {
37
- const validated = this.transactions.map(b => ({
38
- buffered: b,
39
- validated: validateTransaction({
40
- ...b.input,
41
- id: b.transaction_id,
42
- source_file_id: scannedFileId,
43
- }),
44
- }));
45
- const tx = db.transaction(() => {
46
- for (const { validated: v } of validated) {
47
- insertTransactionRows(db, v);
48
- }
49
- for (const u of this.unknowns) {
50
- recordUnknown(db, {
51
- file_id: scannedFileId,
52
- transaction_id: u.transaction_id,
53
- account_id: u.account_id,
54
- kind: u.kind ?? null,
55
- prompt: u.prompt,
56
- options: u.options,
57
- });
58
- }
59
- });
60
- tx();
61
- return { transactions: this.transactions.length, unknowns: this.unknowns.length };
62
- }
2
+ function createMutex() {
3
+ let chain = Promise.resolve();
4
+ return {
5
+ runExclusive(fn) {
6
+ const next = chain.then(() => fn());
7
+ chain = next.catch(() => undefined);
8
+ return next;
9
+ },
10
+ };
11
+ }
12
+ function emptyState(scanId) {
13
+ return {
14
+ scanId,
15
+ transactions: new Map(),
16
+ unknowns: new Map(),
17
+ accountsCreated: new Set(),
18
+ merchantsCreated: new Set(),
19
+ fileIds: new Set(),
20
+ };
21
+ }
22
+ function addIfNew(set, value) {
23
+ if (set.has(value))
24
+ return false;
25
+ set.add(value);
26
+ return true;
27
+ }
28
+ export function createBuffer(scanId, bus) {
29
+ const state = emptyState(scanId);
30
+ const mutex = createMutex();
31
+ return {
32
+ scanId,
33
+ appendTransaction: (input, chunkId) => mutex.runExclusive(() => {
34
+ const id = `tx:${randomUUID()}`;
35
+ const tx = { transaction_id: id, chunkId, input };
36
+ state.transactions.set(id, tx);
37
+ bus.publish({ kind: "transaction_appended", transaction: tx, chunkId });
38
+ return id;
39
+ }),
40
+ updateTransaction: (id, mut) => mutex.runExclusive(() => {
41
+ const before = state.transactions.get(id);
42
+ if (!before)
43
+ return false;
44
+ const after = mut(before);
45
+ state.transactions.set(id, after);
46
+ bus.publish({ kind: "transaction_updated", transactionId: id, before, after });
47
+ return true;
48
+ }),
49
+ removeTransaction: (id, reason) => mutex.runExclusive(() => {
50
+ if (!state.transactions.delete(id))
51
+ return false;
52
+ bus.publish({ kind: "transaction_removed", transactionId: id, reason });
53
+ return true;
54
+ }),
55
+ appendUnknown: (input) => mutex.runExclusive(() => {
56
+ const id = `bu:${randomUUID()}`;
57
+ const u = { ...input, unknown_id: id, answer: null };
58
+ state.unknowns.set(id, u);
59
+ bus.publish({ kind: "unknown_appended", unknown: u, chunkId: u.chunkId });
60
+ return id;
61
+ }),
62
+ closeUnknown: (id, answer) => mutex.runExclusive(() => {
63
+ const u = state.unknowns.get(id);
64
+ if (!u || u.answer !== null)
65
+ return false;
66
+ state.unknowns.set(id, { ...u, answer });
67
+ bus.publish({ kind: "unknown_closed", unknownId: id, answer, chunkId: u.chunkId });
68
+ return true;
69
+ }),
70
+ recordFile: (fileId) => addIfNew(state.fileIds, fileId),
71
+ recordAccountCreated: (accountId) => addIfNew(state.accountsCreated, accountId),
72
+ recordMerchantCreated: (merchantId) => addIfNew(state.merchantsCreated, merchantId),
73
+ snapshot: () => ({
74
+ transactions: Array.from(state.transactions.values()),
75
+ unknowns: Array.from(state.unknowns.values()),
76
+ accountsCreated: Array.from(state.accountsCreated),
77
+ merchantsCreated: Array.from(state.merchantsCreated),
78
+ fileIds: Array.from(state.fileIds),
79
+ }),
80
+ size: () => {
81
+ let open = 0;
82
+ for (const u of state.unknowns.values())
83
+ if (u.answer === null)
84
+ open++;
85
+ return { transactions: state.transactions.size, unknowns: state.unknowns.size, openUnknowns: open };
86
+ },
87
+ getTransaction: (id) => state.transactions.get(id),
88
+ *filterTransactions(predicate) {
89
+ for (const tx of state.transactions.values())
90
+ if (predicate(tx))
91
+ yield tx;
92
+ },
93
+ openUnknowns: () => Array.from(state.unknowns.values()).filter(u => u.answer === null),
94
+ };
63
95
  }
@@ -0,0 +1,11 @@
1
+ import type { Bus } from "./types.js";
2
+ /**
3
+ * Pub/sub bus factory. Closes over a Set of listeners + an event history. No
4
+ * class, no `this` — just a record of callbacks the scanner engine wires up
5
+ * once per scan run.
6
+ *
7
+ * Errors thrown synchronously by a listener are caught and logged. Rejected
8
+ * promises from async listeners are also caught. A misbehaving subscriber
9
+ * never silences the bus for the rest.
10
+ */
11
+ export declare function createBus(): Bus;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Pub/sub bus factory. Closes over a Set of listeners + an event history. No
3
+ * class, no `this` — just a record of callbacks the scanner engine wires up
4
+ * once per scan run.
5
+ *
6
+ * Errors thrown synchronously by a listener are caught and logged. Rejected
7
+ * promises from async listeners are also caught. A misbehaving subscriber
8
+ * never silences the bus for the rest.
9
+ */
10
+ export function createBus() {
11
+ const listeners = new Set();
12
+ const history = [];
13
+ const safelyInvoke = (fn, event) => {
14
+ try {
15
+ const result = fn(event);
16
+ if (result && typeof result.catch === "function") {
17
+ result.catch(err => console.error(`[bus listener] ${err.message}`));
18
+ }
19
+ }
20
+ catch (err) {
21
+ console.error(`[bus listener] ${err.message}`);
22
+ }
23
+ };
24
+ return {
25
+ subscribe(fn) {
26
+ listeners.add(fn);
27
+ return () => { listeners.delete(fn); };
28
+ },
29
+ publish(event) {
30
+ history.push(event);
31
+ for (const fn of listeners)
32
+ safelyInvoke(fn, event);
33
+ },
34
+ history() {
35
+ return history;
36
+ },
37
+ reset() {
38
+ listeners.clear();
39
+ history.length = 0;
40
+ },
41
+ };
42
+ }
@@ -0,0 +1,53 @@
1
+ import type { BufferedTransaction, BufferedUnknown } from "../buffer/types.js";
2
+ /**
3
+ * The typed event stream every subdomain in the scanner communicates over.
4
+ * The Buffer publishes mutation events; the Auditor and the CLI dashboard
5
+ * subscribe. Producers and consumers never reference each other directly —
6
+ * everyone talks through the Bus.
7
+ */
8
+ export type BufferEvent = {
9
+ kind: "transaction_appended";
10
+ transaction: BufferedTransaction;
11
+ chunkId: string;
12
+ } | {
13
+ kind: "transaction_updated";
14
+ transactionId: string;
15
+ before: BufferedTransaction;
16
+ after: BufferedTransaction;
17
+ } | {
18
+ kind: "transaction_removed";
19
+ transactionId: string;
20
+ reason: string;
21
+ } | {
22
+ kind: "unknown_appended";
23
+ unknown: BufferedUnknown;
24
+ chunkId: string | null;
25
+ } | {
26
+ kind: "unknown_closed";
27
+ unknownId: string;
28
+ answer: string;
29
+ } | {
30
+ kind: "chunk_started";
31
+ chunkId: string;
32
+ fileId: string;
33
+ pageNumber: number;
34
+ } | {
35
+ kind: "chunk_completed";
36
+ chunkId: string;
37
+ fileId: string;
38
+ pageNumber: number;
39
+ } | {
40
+ kind: "worker_completed";
41
+ workerId: string;
42
+ chunkId: string;
43
+ transactionsAdded: number;
44
+ unknownsAdded: number;
45
+ };
46
+ export type EventKind = BufferEvent["kind"];
47
+ export type EventListener = (event: BufferEvent) => void | Promise<void>;
48
+ export interface Bus {
49
+ subscribe(fn: EventListener): () => void;
50
+ publish(event: BufferEvent): void;
51
+ history(): readonly BufferEvent[];
52
+ reset(): void;
53
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,38 @@
1
+ import type { BufferedTransaction, BufferedUnknown } from "./buffer.js";
2
+ /**
3
+ * Typed pub/sub bus every scanner subdomain talks over. The buffer publishes
4
+ * mutation events; the audit engine and CLI dashboard subscribe. Producers
5
+ * never reference consumers — everyone goes through the bus.
6
+ */
7
+ export type BufferEvent = {
8
+ kind: "transaction_appended";
9
+ transaction: BufferedTransaction;
10
+ chunkId: string;
11
+ } | {
12
+ kind: "transaction_updated";
13
+ transactionId: string;
14
+ before: BufferedTransaction;
15
+ after: BufferedTransaction;
16
+ } | {
17
+ kind: "transaction_removed";
18
+ transactionId: string;
19
+ reason: string;
20
+ } | {
21
+ kind: "unknown_appended";
22
+ unknown: BufferedUnknown;
23
+ chunkId: string | null;
24
+ } | {
25
+ kind: "unknown_closed";
26
+ unknownId: string;
27
+ answer: string;
28
+ chunkId: string | null;
29
+ };
30
+ export type EventKind = BufferEvent["kind"];
31
+ export type EventListener = (event: BufferEvent) => void | Promise<void>;
32
+ export interface Bus {
33
+ subscribe(fn: EventListener): () => void;
34
+ publish(event: BufferEvent): void;
35
+ history(): readonly BufferEvent[];
36
+ reset(): void;
37
+ }
38
+ export declare function createBus(): Bus;
@@ -0,0 +1,37 @@
1
+ function logListenerError(err) {
2
+ console.error(`[bus listener] ${err instanceof Error ? err.message : String(err)}`);
3
+ }
4
+ function safelyInvoke(fn, event) {
5
+ try {
6
+ const result = fn(event);
7
+ if (result instanceof Promise)
8
+ result.catch(logListenerError);
9
+ }
10
+ catch (err) {
11
+ logListenerError(err);
12
+ }
13
+ }
14
+ export function createBus() {
15
+ const listeners = new Set();
16
+ const history = [];
17
+ return {
18
+ subscribe(fn) {
19
+ listeners.add(fn);
20
+ return () => {
21
+ listeners.delete(fn);
22
+ };
23
+ },
24
+ publish(event) {
25
+ history.push(event);
26
+ for (const fn of listeners)
27
+ safelyInvoke(fn, event);
28
+ },
29
+ history() {
30
+ return history;
31
+ },
32
+ reset() {
33
+ listeners.clear();
34
+ history.length = 0;
35
+ },
36
+ };
37
+ }
@@ -0,0 +1,19 @@
1
+ import type Database from "libsql";
2
+ import type { Chunk } from "./engine.js";
3
+ import type { ScanHooks } from "./hooks.js";
4
+ import type { ScanProgress } from "./progress.js";
5
+ export interface ChunkWorkerDeps {
6
+ readonly db: Database.Database;
7
+ readonly scanId: string;
8
+ readonly scannedFileId: string | undefined;
9
+ readonly progress: ScanProgress;
10
+ readonly chunk: Chunk;
11
+ }
12
+ /**
13
+ * Process one chunk: run the LLM scan agent over a single-page PDF blob with
14
+ * scanId + progress sink + scanned_files row injected through the agent
15
+ * context. Agent's record_transactions / note_unknown calls write directly to
16
+ * the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
17
+ * as a `chunk_failed` unknown so the resolver can pick them up.
18
+ */
19
+ export declare function runChunkWorker(deps: ChunkWorkerDeps, hooks: ScanHooks): Promise<void>;
@@ -0,0 +1,67 @@
1
+ import { randomUUID } from "crypto";
2
+ import { runScanAgent } from "../ai/agent.js";
3
+ import { recordUnknown } from "../db/queries/unknowns.js";
4
+ import { buildDocumentBlock } from "./pdf/pdf.js";
5
+ import { tryExecute } from "./result.js";
6
+ /**
7
+ * Process one chunk: run the LLM scan agent over a single-page PDF blob with
8
+ * scanId + progress sink + scanned_files row injected through the agent
9
+ * context. Agent's record_transactions / note_unknown calls write directly to
10
+ * the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
11
+ * as a `chunk_failed` unknown so the resolver can pick them up.
12
+ */
13
+ export async function runChunkWorker(deps, hooks) {
14
+ const workerId = `cw:${randomUUID()}`;
15
+ hooks.onWorkerStart?.(workerId, deps.chunk);
16
+ const outcome = await tryExecute(() => runScanAgent({
17
+ db: deps.db,
18
+ initialMessages: [
19
+ {
20
+ role: "user",
21
+ content: [
22
+ buildDocumentBlock(deps.chunk.bytes, deps.chunk.fileName, deps.chunk.mime),
23
+ { type: "text", text: buildChunkPrompt(deps.chunk) },
24
+ ],
25
+ },
26
+ ],
27
+ prompt: { fileName: deps.chunk.fileName },
28
+ agentCtx: {
29
+ interactive: false,
30
+ scanId: deps.scanId,
31
+ fileId: deps.scannedFileId,
32
+ chunkId: deps.chunk.chunkId,
33
+ progress: deps.progress,
34
+ },
35
+ }));
36
+ hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
37
+ if (!outcome.ok)
38
+ recordChunkFailure(deps, outcome.error);
39
+ }
40
+ function recordChunkFailure(deps, error) {
41
+ try {
42
+ recordUnknown(deps.db, {
43
+ file_id: deps.scannedFileId ?? null,
44
+ scan_id: deps.scanId,
45
+ transaction_id: null,
46
+ account_id: null,
47
+ kind: "chunk_failed",
48
+ prompt: `Chunk ${deps.chunk.fileName} p${deps.chunk.pageNumber} failed to parse: ${error}.`,
49
+ });
50
+ deps.progress.emit({ chunkId: deps.chunk.chunkId, kind: "unknown" });
51
+ }
52
+ catch {
53
+ // failure to record a failure shouldn't crash the file worker
54
+ }
55
+ }
56
+ function buildChunkPrompt(chunk) {
57
+ return [
58
+ `You are parsing page ${chunk.pageNumber} of ${chunk.totalPages} of ${chunk.fileName}.`,
59
+ ``,
60
+ `Steps:`,
61
+ `1. Call list_accounts to see what already exists.`,
62
+ `2. If this page reveals an account that isn't in the chart yet, call create_account once.`,
63
+ `3. For every transaction on this page, call record_transactions (plural) with all rows in one batch.`,
64
+ `4. If the first or last row looks incomplete (no date, or no amount column visible — the row likely continues onto an adjacent page), call note_unknown with kind="boundary_continuation" and the raw row text. Do NOT invent missing fields.`,
65
+ `5. When done with this page, call mark_file_scanned with a short summary.`,
66
+ ].join("\n");
67
+ }
@@ -0,0 +1,20 @@
1
+ import type Database from "libsql";
2
+ import type { ScanBuffer } from "./buffer.js";
3
+ import type { Chunk } from "./engine.js";
4
+ import type { ScanHooks } from "./hooks.js";
5
+ export interface ChunkWorkerDeps {
6
+ readonly db: Database.Database;
7
+ readonly buffer: ScanBuffer;
8
+ readonly chunk: Chunk;
9
+ }
10
+ export interface ChunkWorkerResult {
11
+ readonly workerId: string;
12
+ readonly ok: boolean;
13
+ readonly error?: string;
14
+ }
15
+ /**
16
+ * Process one chunk: run the LLM scan agent over a single-page PDF blob with
17
+ * the shared buffer + chunkId injected. Agent's `record_transactions` calls
18
+ * land in the shared buffer; events fan out to the audit engine + dashboard.
19
+ */
20
+ export declare function runChunkWorker(deps: ChunkWorkerDeps, hooks: ScanHooks): Promise<ChunkWorkerResult>;
@@ -0,0 +1,59 @@
1
+ import { randomUUID } from "crypto";
2
+ import { runScanAgent } from "../ai/agent.js";
3
+ import { buildDocumentBlock } from "./pdf.js";
4
+ import { tryExecute } from "./result.js";
5
+ /**
6
+ * Process one chunk: run the LLM scan agent over a single-page PDF blob with
7
+ * the shared buffer + chunkId injected. Agent's `record_transactions` calls
8
+ * land in the shared buffer; events fan out to the audit engine + dashboard.
9
+ */
10
+ export async function runChunkWorker(deps, hooks) {
11
+ const workerId = `cw:${randomUUID()}`;
12
+ hooks.onWorkerStart?.(workerId, deps.chunk);
13
+ const outcome = await tryExecute(() => runScanAgent({
14
+ db: deps.db,
15
+ initialMessages: [
16
+ {
17
+ role: "user",
18
+ content: [
19
+ buildDocumentBlock(deps.chunk.bytes, deps.chunk.fileName, deps.chunk.mime),
20
+ { type: "text", text: buildChunkPrompt(deps.chunk) },
21
+ ],
22
+ },
23
+ ],
24
+ prompt: { fileName: deps.chunk.fileName },
25
+ agentCtx: {
26
+ interactive: false,
27
+ buffer: deps.buffer,
28
+ chunkId: deps.chunk.chunkId,
29
+ },
30
+ onProgress: ev => hooks.onWorkerProgress?.(workerId, deps.chunk, { phase: ev.phase, toolName: ev.toolName }),
31
+ }));
32
+ hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
33
+ if (!outcome.ok) {
34
+ await recordChunkFailure(deps.buffer, deps.chunk, outcome.error);
35
+ return { workerId, ok: false, error: outcome.error };
36
+ }
37
+ return { workerId, ok: true };
38
+ }
39
+ async function recordChunkFailure(buffer, chunk, error) {
40
+ await buffer.appendUnknown({
41
+ chunkId: chunk.chunkId,
42
+ transaction_id: null,
43
+ account_id: null,
44
+ kind: "chunk_failed",
45
+ prompt: `Chunk ${chunk.fileName} p${chunk.pageNumber} failed to parse: ${error}.`,
46
+ });
47
+ }
48
+ function buildChunkPrompt(chunk) {
49
+ return [
50
+ `You are parsing page ${chunk.pageNumber} of ${chunk.totalPages} of ${chunk.fileName}.`,
51
+ ``,
52
+ `Steps:`,
53
+ `1. Call list_accounts to see what already exists.`,
54
+ `2. If this page reveals an account that isn't in the chart yet, call create_account once.`,
55
+ `3. For every transaction on this page, call record_transactions (plural) with all rows in one batch.`,
56
+ `4. If the first or last row looks incomplete (no date, or no amount column visible — the row likely continues onto an adjacent page), call note_unknown with kind="boundary_continuation" and the raw row text. Do NOT invent missing fields.`,
57
+ `5. When done with this page, call mark_file_scanned with a short summary.`,
58
+ ].join("\n");
59
+ }
@@ -0,0 +1,7 @@
1
+ import type { Chunk, DecryptedFile } from "../engine/types.js";
2
+ /**
3
+ * Split one decrypted PDF into N single-page Chunks. Each chunk is a
4
+ * standalone, valid PDF so the per-chunk LLM agent gets a clean document
5
+ * without siblings.
6
+ */
7
+ export declare function chunkPdf(file: DecryptedFile): Promise<Chunk[]>;