plasalid 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -4
- package/dist/ai/agent.d.ts +6 -7
- package/dist/ai/agent.js +27 -11
- package/dist/ai/personas.js +48 -46
- package/dist/ai/system-prompt.js +1 -1
- package/dist/ai/tools/account-mutex.d.ts +1 -0
- package/dist/ai/tools/account-mutex.js +16 -0
- package/dist/ai/tools/index.js +4 -12
- package/dist/ai/tools/ingest.d.ts +1 -1
- package/dist/ai/tools/ingest.js +282 -242
- package/dist/ai/tools/merchants.js +1 -28
- package/dist/ai/tools/read.js +8 -8
- package/dist/ai/tools/record.js +3 -36
- package/dist/ai/tools/resolve.js +25 -22
- package/dist/ai/tools/scan.js +0 -1
- package/dist/ai/tools/types.d.ts +14 -21
- package/dist/cli/commands/record.js +1 -82
- package/dist/cli/commands/resolve.d.ts +5 -2
- package/dist/cli/commands/resolve.js +36 -5
- package/dist/cli/commands/revert.js +4 -2
- package/dist/cli/commands/rules.js +2 -2
- package/dist/cli/commands/scan.js +199 -128
- package/dist/cli/commands/status.js +5 -5
- package/dist/cli/index.js +8 -29
- package/dist/cli/ink/ScanDashboard.d.ts +49 -0
- package/dist/cli/ink/ScanDashboard.js +214 -0
- package/dist/cli/ink/scan_dashboard.d.ts +40 -25
- package/dist/cli/ink/scan_dashboard.js +139 -44
- package/dist/db/queries/account-balance.d.ts +1 -1
- package/dist/db/queries/questions.d.ts +62 -0
- package/dist/db/queries/questions.js +110 -0
- package/dist/db/queries/transactions.d.ts +1 -1
- package/dist/db/queries/unknowns.d.ts +17 -15
- package/dist/db/queries/unknowns.js +35 -39
- package/dist/db/schema.js +6 -28
- package/dist/scanner/audit/auditor.d.ts +31 -0
- package/dist/scanner/audit/auditor.js +72 -0
- package/dist/scanner/audit/engine.d.ts +10 -0
- package/dist/scanner/audit/engine.js +98 -0
- package/dist/scanner/audit/eventBus.d.ts +60 -0
- package/dist/scanner/audit/eventBus.js +35 -0
- package/dist/scanner/audit/passes/index.d.ts +11 -0
- package/dist/scanner/audit/passes/index.js +9 -0
- package/dist/scanner/audit/passes/types.d.ts +23 -0
- package/dist/scanner/audit/passes/types.js +1 -0
- package/dist/scanner/audit/types.d.ts +27 -0
- package/dist/scanner/audit/types.js +1 -0
- package/dist/scanner/auditor.d.ts +51 -0
- package/dist/scanner/auditor.js +80 -0
- package/dist/scanner/buffer/engine.d.ts +9 -0
- package/dist/scanner/buffer/engine.js +110 -0
- package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
- package/dist/scanner/buffer/sharedBuffer.js +130 -0
- package/dist/scanner/buffer/types.d.ts +67 -0
- package/dist/scanner/buffer/types.js +1 -0
- package/dist/scanner/buffer.d.ts +45 -38
- package/dist/scanner/buffer.js +93 -61
- package/dist/scanner/bus/engine.d.ts +11 -0
- package/dist/scanner/bus/engine.js +42 -0
- package/dist/scanner/bus/types.d.ts +53 -0
- package/dist/scanner/bus/types.js +1 -0
- package/dist/scanner/bus.d.ts +38 -0
- package/dist/scanner/bus.js +37 -0
- package/dist/scanner/chunk-worker.d.ts +19 -0
- package/dist/scanner/chunk-worker.js +67 -0
- package/dist/scanner/chunkWorker.d.ts +20 -0
- package/dist/scanner/chunkWorker.js +59 -0
- package/dist/scanner/chunker/chunker.d.ts +7 -0
- package/dist/scanner/chunker/chunker.js +60 -0
- package/dist/scanner/chunker.d.ts +7 -0
- package/dist/scanner/chunker.js +60 -0
- package/dist/scanner/converge.d.ts +29 -0
- package/dist/scanner/converge.js +15 -0
- package/dist/scanner/decrypt.d.ts +10 -0
- package/dist/scanner/decrypt.js +80 -0
- package/dist/scanner/engine/scanEngine.d.ts +24 -0
- package/dist/scanner/engine/scanEngine.js +87 -0
- package/dist/scanner/engine/types.d.ts +90 -0
- package/dist/scanner/engine/types.js +1 -0
- package/dist/scanner/engine.d.ts +90 -0
- package/dist/scanner/engine.js +84 -0
- package/dist/scanner/file-worker.d.ts +33 -0
- package/dist/scanner/file-worker.js +28 -0
- package/dist/scanner/fileWorker.d.ts +33 -0
- package/dist/scanner/fileWorker.js +22 -0
- package/dist/scanner/hooks/types.d.ts +25 -0
- package/dist/scanner/hooks/types.js +1 -0
- package/dist/scanner/hooks.d.ts +23 -0
- package/dist/scanner/hooks.js +1 -0
- package/dist/scanner/parse.d.ts +10 -0
- package/dist/scanner/parse.js +47 -0
- package/dist/scanner/passes/index.d.ts +8 -0
- package/dist/scanner/passes/index.js +6 -0
- package/dist/scanner/passes/types.d.ts +22 -0
- package/dist/scanner/passes/types.js +1 -0
- package/dist/scanner/pdf/chunker.d.ts +7 -0
- package/dist/scanner/pdf/chunker.js +60 -0
- package/dist/scanner/pdf/password-store.d.ts +34 -0
- package/dist/scanner/pdf/password-store.js +83 -0
- package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
- package/dist/scanner/pdf/pdf-unlock.js +50 -0
- package/dist/scanner/pdf/pdf.d.ts +17 -0
- package/dist/scanner/pdf/pdf.js +36 -0
- package/dist/scanner/pdf/state-machine.d.ts +60 -0
- package/dist/scanner/pdf/state-machine.js +64 -0
- package/dist/scanner/pdf/unlock.d.ts +22 -0
- package/dist/scanner/pdf/unlock.js +121 -0
- package/dist/scanner/phase-decrypt.d.ts +10 -0
- package/dist/scanner/phase-decrypt.js +80 -0
- package/dist/scanner/phase-parse.d.ts +10 -0
- package/dist/scanner/phase-parse.js +46 -0
- package/dist/scanner/phases/chunk.d.ts +8 -0
- package/dist/scanner/phases/chunk.js +13 -0
- package/dist/scanner/phases/commit.d.ts +12 -0
- package/dist/scanner/phases/commit.js +140 -0
- package/dist/scanner/phases/decrypt.d.ts +10 -0
- package/dist/scanner/phases/decrypt.js +80 -0
- package/dist/scanner/phases/parse.d.ts +10 -0
- package/dist/scanner/phases/parse.js +46 -0
- package/dist/scanner/phases/resolve.d.ts +10 -0
- package/dist/scanner/phases/resolve.js +17 -0
- package/dist/scanner/phases/review.d.ts +10 -0
- package/dist/scanner/phases/review.js +12 -0
- package/dist/scanner/progress.d.ts +14 -0
- package/dist/scanner/progress.js +21 -0
- package/dist/scanner/resolver-memory.d.ts +8 -0
- package/dist/scanner/resolver-memory.js +24 -0
- package/dist/scanner/resolver.d.ts +39 -0
- package/dist/scanner/resolver.js +196 -0
- package/dist/scanner/result.d.ts +17 -0
- package/dist/scanner/result.js +19 -0
- package/dist/scanner/run-passes.d.ts +30 -0
- package/dist/scanner/run-passes.js +15 -0
- package/dist/scanner/unlock.js +1 -1
- package/dist/scanner/worker.d.ts +19 -0
- package/dist/scanner/worker.js +67 -0
- package/dist/scanner/workers/chunkWorker.d.ts +20 -0
- package/dist/scanner/workers/chunkWorker.js +65 -0
- package/dist/scanner/workers/fileWorker.d.ts +32 -0
- package/dist/scanner/workers/fileWorker.js +22 -0
- package/package.json +1 -1
package/dist/scanner/buffer.d.ts
CHANGED
|
@@ -1,51 +1,58 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import {
|
|
1
|
+
import type { TransactionInput } from "../db/queries/transactions.js";
|
|
2
|
+
import type { Bus } from "./bus.js";
|
|
3
3
|
/**
|
|
4
|
-
* One
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* Account writes (`create_account`, `update_account_metadata`) and merchant
|
|
10
|
-
* writes deliberately bypass the buffer — they go directly to the DB through
|
|
11
|
-
* their own mutexes so concurrent agents see each other's creates and don't
|
|
12
|
-
* duplicate.
|
|
4
|
+
* One in-flight transaction held by the buffer. The id is synthesized on
|
|
5
|
+
* append so the agent can refer to it in the same turn (e.g. to attach a
|
|
6
|
+
* note_unknown). `chunkId` records who created it — audit passes use this
|
|
7
|
+
* to dedup across chunks and merge boundary continuations.
|
|
13
8
|
*/
|
|
9
|
+
export interface BufferedTransaction {
|
|
10
|
+
readonly transaction_id: string;
|
|
11
|
+
readonly chunkId: string;
|
|
12
|
+
input: TransactionInput;
|
|
13
|
+
}
|
|
14
14
|
export interface BufferedUnknown {
|
|
15
|
-
|
|
15
|
+
readonly unknown_id: string;
|
|
16
|
+
readonly chunkId: string | null;
|
|
16
17
|
transaction_id: string | null;
|
|
17
18
|
account_id: string | null;
|
|
18
|
-
kind
|
|
19
|
+
kind: string | null;
|
|
19
20
|
prompt: string;
|
|
20
21
|
options?: string[];
|
|
22
|
+
/** Null until the auditor or review TUI closes it. */
|
|
23
|
+
answer: string | null;
|
|
21
24
|
}
|
|
22
|
-
export interface
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
export interface BufferSnapshot {
|
|
26
|
+
readonly transactions: readonly BufferedTransaction[];
|
|
27
|
+
readonly unknowns: readonly BufferedUnknown[];
|
|
28
|
+
readonly accountsCreated: readonly string[];
|
|
29
|
+
readonly merchantsCreated: readonly string[];
|
|
30
|
+
readonly fileIds: readonly string[];
|
|
26
31
|
}
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
* them.
|
|
46
|
-
*/
|
|
47
|
-
commit(db: Database.Database, scannedFileId: string): {
|
|
32
|
+
/**
|
|
33
|
+
* The shared buffer every chunk worker writes to during a scan run. Mutations
|
|
34
|
+
* serialize through an internal mutex; reads are lock-free. Every mutation
|
|
35
|
+
* publishes a typed event on the bus so the auditor can react in flight.
|
|
36
|
+
*/
|
|
37
|
+
export interface ScanBuffer {
|
|
38
|
+
readonly scanId: string;
|
|
39
|
+
appendTransaction(input: TransactionInput, chunkId: string): Promise<string>;
|
|
40
|
+
updateTransaction(id: string, mut: (current: BufferedTransaction) => BufferedTransaction): Promise<boolean>;
|
|
41
|
+
removeTransaction(id: string, reason: string): Promise<boolean>;
|
|
42
|
+
appendUnknown(input: Omit<BufferedUnknown, "unknown_id" | "answer">): Promise<string>;
|
|
43
|
+
closeUnknown(id: string, answer: string): Promise<boolean>;
|
|
44
|
+
/** Returns true if newly recorded; false if already present. */
|
|
45
|
+
recordFile(fileId: string): boolean;
|
|
46
|
+
recordAccountCreated(accountId: string): boolean;
|
|
47
|
+
recordMerchantCreated(merchantId: string): boolean;
|
|
48
|
+
snapshot(): BufferSnapshot;
|
|
49
|
+
size(): {
|
|
48
50
|
transactions: number;
|
|
49
51
|
unknowns: number;
|
|
52
|
+
openUnknowns: number;
|
|
50
53
|
};
|
|
54
|
+
getTransaction(id: string): BufferedTransaction | undefined;
|
|
55
|
+
filterTransactions(predicate: (tx: BufferedTransaction) => boolean): IterableIterator<BufferedTransaction>;
|
|
56
|
+
openUnknowns(): BufferedUnknown[];
|
|
51
57
|
}
|
|
58
|
+
export declare function createBuffer(scanId: string, bus: Bus): ScanBuffer;
|
package/dist/scanner/buffer.js
CHANGED
|
@@ -1,63 +1,95 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
2
|
+
function createMutex() {
|
|
3
|
+
let chain = Promise.resolve();
|
|
4
|
+
return {
|
|
5
|
+
runExclusive(fn) {
|
|
6
|
+
const next = chain.then(() => fn());
|
|
7
|
+
chain = next.catch(() => undefined);
|
|
8
|
+
return next;
|
|
9
|
+
},
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
function emptyState(scanId) {
|
|
13
|
+
return {
|
|
14
|
+
scanId,
|
|
15
|
+
transactions: new Map(),
|
|
16
|
+
unknowns: new Map(),
|
|
17
|
+
accountsCreated: new Set(),
|
|
18
|
+
merchantsCreated: new Set(),
|
|
19
|
+
fileIds: new Set(),
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
function addIfNew(set, value) {
|
|
23
|
+
if (set.has(value))
|
|
24
|
+
return false;
|
|
25
|
+
set.add(value);
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
export function createBuffer(scanId, bus) {
|
|
29
|
+
const state = emptyState(scanId);
|
|
30
|
+
const mutex = createMutex();
|
|
31
|
+
return {
|
|
32
|
+
scanId,
|
|
33
|
+
appendTransaction: (input, chunkId) => mutex.runExclusive(() => {
|
|
34
|
+
const id = `tx:${randomUUID()}`;
|
|
35
|
+
const tx = { transaction_id: id, chunkId, input };
|
|
36
|
+
state.transactions.set(id, tx);
|
|
37
|
+
bus.publish({ kind: "transaction_appended", transaction: tx, chunkId });
|
|
38
|
+
return id;
|
|
39
|
+
}),
|
|
40
|
+
updateTransaction: (id, mut) => mutex.runExclusive(() => {
|
|
41
|
+
const before = state.transactions.get(id);
|
|
42
|
+
if (!before)
|
|
43
|
+
return false;
|
|
44
|
+
const after = mut(before);
|
|
45
|
+
state.transactions.set(id, after);
|
|
46
|
+
bus.publish({ kind: "transaction_updated", transactionId: id, before, after });
|
|
47
|
+
return true;
|
|
48
|
+
}),
|
|
49
|
+
removeTransaction: (id, reason) => mutex.runExclusive(() => {
|
|
50
|
+
if (!state.transactions.delete(id))
|
|
51
|
+
return false;
|
|
52
|
+
bus.publish({ kind: "transaction_removed", transactionId: id, reason });
|
|
53
|
+
return true;
|
|
54
|
+
}),
|
|
55
|
+
appendUnknown: (input) => mutex.runExclusive(() => {
|
|
56
|
+
const id = `bu:${randomUUID()}`;
|
|
57
|
+
const u = { ...input, unknown_id: id, answer: null };
|
|
58
|
+
state.unknowns.set(id, u);
|
|
59
|
+
bus.publish({ kind: "unknown_appended", unknown: u, chunkId: u.chunkId });
|
|
60
|
+
return id;
|
|
61
|
+
}),
|
|
62
|
+
closeUnknown: (id, answer) => mutex.runExclusive(() => {
|
|
63
|
+
const u = state.unknowns.get(id);
|
|
64
|
+
if (!u || u.answer !== null)
|
|
65
|
+
return false;
|
|
66
|
+
state.unknowns.set(id, { ...u, answer });
|
|
67
|
+
bus.publish({ kind: "unknown_closed", unknownId: id, answer, chunkId: u.chunkId });
|
|
68
|
+
return true;
|
|
69
|
+
}),
|
|
70
|
+
recordFile: (fileId) => addIfNew(state.fileIds, fileId),
|
|
71
|
+
recordAccountCreated: (accountId) => addIfNew(state.accountsCreated, accountId),
|
|
72
|
+
recordMerchantCreated: (merchantId) => addIfNew(state.merchantsCreated, merchantId),
|
|
73
|
+
snapshot: () => ({
|
|
74
|
+
transactions: Array.from(state.transactions.values()),
|
|
75
|
+
unknowns: Array.from(state.unknowns.values()),
|
|
76
|
+
accountsCreated: Array.from(state.accountsCreated),
|
|
77
|
+
merchantsCreated: Array.from(state.merchantsCreated),
|
|
78
|
+
fileIds: Array.from(state.fileIds),
|
|
79
|
+
}),
|
|
80
|
+
size: () => {
|
|
81
|
+
let open = 0;
|
|
82
|
+
for (const u of state.unknowns.values())
|
|
83
|
+
if (u.answer === null)
|
|
84
|
+
open++;
|
|
85
|
+
return { transactions: state.transactions.size, unknowns: state.unknowns.size, openUnknowns: open };
|
|
86
|
+
},
|
|
87
|
+
getTransaction: (id) => state.transactions.get(id),
|
|
88
|
+
*filterTransactions(predicate) {
|
|
89
|
+
for (const tx of state.transactions.values())
|
|
90
|
+
if (predicate(tx))
|
|
91
|
+
yield tx;
|
|
92
|
+
},
|
|
93
|
+
openUnknowns: () => Array.from(state.unknowns.values()).filter(u => u.answer === null),
|
|
94
|
+
};
|
|
63
95
|
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { Bus } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Pub/sub bus factory. Closes over a Set of listeners + an event history. No
|
|
4
|
+
* class, no `this` — just a record of callbacks the scanner engine wires up
|
|
5
|
+
* once per scan run.
|
|
6
|
+
*
|
|
7
|
+
* Errors thrown synchronously by a listener are caught and logged. Rejected
|
|
8
|
+
* promises from async listeners are also caught. A misbehaving subscriber
|
|
9
|
+
* never silences the bus for the rest.
|
|
10
|
+
*/
|
|
11
|
+
export declare function createBus(): Bus;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pub/sub bus factory. Closes over a Set of listeners + an event history. No
|
|
3
|
+
* class, no `this` — just a record of callbacks the scanner engine wires up
|
|
4
|
+
* once per scan run.
|
|
5
|
+
*
|
|
6
|
+
* Errors thrown synchronously by a listener are caught and logged. Rejected
|
|
7
|
+
* promises from async listeners are also caught. A misbehaving subscriber
|
|
8
|
+
* never silences the bus for the rest.
|
|
9
|
+
*/
|
|
10
|
+
export function createBus() {
|
|
11
|
+
const listeners = new Set();
|
|
12
|
+
const history = [];
|
|
13
|
+
const safelyInvoke = (fn, event) => {
|
|
14
|
+
try {
|
|
15
|
+
const result = fn(event);
|
|
16
|
+
if (result && typeof result.catch === "function") {
|
|
17
|
+
result.catch(err => console.error(`[bus listener] ${err.message}`));
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
catch (err) {
|
|
21
|
+
console.error(`[bus listener] ${err.message}`);
|
|
22
|
+
}
|
|
23
|
+
};
|
|
24
|
+
return {
|
|
25
|
+
subscribe(fn) {
|
|
26
|
+
listeners.add(fn);
|
|
27
|
+
return () => { listeners.delete(fn); };
|
|
28
|
+
},
|
|
29
|
+
publish(event) {
|
|
30
|
+
history.push(event);
|
|
31
|
+
for (const fn of listeners)
|
|
32
|
+
safelyInvoke(fn, event);
|
|
33
|
+
},
|
|
34
|
+
history() {
|
|
35
|
+
return history;
|
|
36
|
+
},
|
|
37
|
+
reset() {
|
|
38
|
+
listeners.clear();
|
|
39
|
+
history.length = 0;
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import type { BufferedTransaction, BufferedUnknown } from "../buffer/types.js";
|
|
2
|
+
/**
|
|
3
|
+
* The typed event stream every subdomain in the scanner communicates over.
|
|
4
|
+
* The Buffer publishes mutation events; the Auditor and the CLI dashboard
|
|
5
|
+
* subscribe. Producers and consumers never reference each other directly —
|
|
6
|
+
* everyone talks through the Bus.
|
|
7
|
+
*/
|
|
8
|
+
export type BufferEvent = {
|
|
9
|
+
kind: "transaction_appended";
|
|
10
|
+
transaction: BufferedTransaction;
|
|
11
|
+
chunkId: string;
|
|
12
|
+
} | {
|
|
13
|
+
kind: "transaction_updated";
|
|
14
|
+
transactionId: string;
|
|
15
|
+
before: BufferedTransaction;
|
|
16
|
+
after: BufferedTransaction;
|
|
17
|
+
} | {
|
|
18
|
+
kind: "transaction_removed";
|
|
19
|
+
transactionId: string;
|
|
20
|
+
reason: string;
|
|
21
|
+
} | {
|
|
22
|
+
kind: "unknown_appended";
|
|
23
|
+
unknown: BufferedUnknown;
|
|
24
|
+
chunkId: string | null;
|
|
25
|
+
} | {
|
|
26
|
+
kind: "unknown_closed";
|
|
27
|
+
unknownId: string;
|
|
28
|
+
answer: string;
|
|
29
|
+
} | {
|
|
30
|
+
kind: "chunk_started";
|
|
31
|
+
chunkId: string;
|
|
32
|
+
fileId: string;
|
|
33
|
+
pageNumber: number;
|
|
34
|
+
} | {
|
|
35
|
+
kind: "chunk_completed";
|
|
36
|
+
chunkId: string;
|
|
37
|
+
fileId: string;
|
|
38
|
+
pageNumber: number;
|
|
39
|
+
} | {
|
|
40
|
+
kind: "worker_completed";
|
|
41
|
+
workerId: string;
|
|
42
|
+
chunkId: string;
|
|
43
|
+
transactionsAdded: number;
|
|
44
|
+
unknownsAdded: number;
|
|
45
|
+
};
|
|
46
|
+
export type EventKind = BufferEvent["kind"];
|
|
47
|
+
export type EventListener = (event: BufferEvent) => void | Promise<void>;
|
|
48
|
+
export interface Bus {
|
|
49
|
+
subscribe(fn: EventListener): () => void;
|
|
50
|
+
publish(event: BufferEvent): void;
|
|
51
|
+
history(): readonly BufferEvent[];
|
|
52
|
+
reset(): void;
|
|
53
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { BufferedTransaction, BufferedUnknown } from "./buffer.js";
|
|
2
|
+
/**
|
|
3
|
+
* Typed pub/sub bus every scanner subdomain talks over. The buffer publishes
|
|
4
|
+
* mutation events; the audit engine and CLI dashboard subscribe. Producers
|
|
5
|
+
* never reference consumers — everyone goes through the bus.
|
|
6
|
+
*/
|
|
7
|
+
export type BufferEvent = {
|
|
8
|
+
kind: "transaction_appended";
|
|
9
|
+
transaction: BufferedTransaction;
|
|
10
|
+
chunkId: string;
|
|
11
|
+
} | {
|
|
12
|
+
kind: "transaction_updated";
|
|
13
|
+
transactionId: string;
|
|
14
|
+
before: BufferedTransaction;
|
|
15
|
+
after: BufferedTransaction;
|
|
16
|
+
} | {
|
|
17
|
+
kind: "transaction_removed";
|
|
18
|
+
transactionId: string;
|
|
19
|
+
reason: string;
|
|
20
|
+
} | {
|
|
21
|
+
kind: "unknown_appended";
|
|
22
|
+
unknown: BufferedUnknown;
|
|
23
|
+
chunkId: string | null;
|
|
24
|
+
} | {
|
|
25
|
+
kind: "unknown_closed";
|
|
26
|
+
unknownId: string;
|
|
27
|
+
answer: string;
|
|
28
|
+
chunkId: string | null;
|
|
29
|
+
};
|
|
30
|
+
export type EventKind = BufferEvent["kind"];
|
|
31
|
+
export type EventListener = (event: BufferEvent) => void | Promise<void>;
|
|
32
|
+
export interface Bus {
|
|
33
|
+
subscribe(fn: EventListener): () => void;
|
|
34
|
+
publish(event: BufferEvent): void;
|
|
35
|
+
history(): readonly BufferEvent[];
|
|
36
|
+
reset(): void;
|
|
37
|
+
}
|
|
38
|
+
export declare function createBus(): Bus;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
function logListenerError(err) {
|
|
2
|
+
console.error(`[bus listener] ${err instanceof Error ? err.message : String(err)}`);
|
|
3
|
+
}
|
|
4
|
+
function safelyInvoke(fn, event) {
|
|
5
|
+
try {
|
|
6
|
+
const result = fn(event);
|
|
7
|
+
if (result instanceof Promise)
|
|
8
|
+
result.catch(logListenerError);
|
|
9
|
+
}
|
|
10
|
+
catch (err) {
|
|
11
|
+
logListenerError(err);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
export function createBus() {
|
|
15
|
+
const listeners = new Set();
|
|
16
|
+
const history = [];
|
|
17
|
+
return {
|
|
18
|
+
subscribe(fn) {
|
|
19
|
+
listeners.add(fn);
|
|
20
|
+
return () => {
|
|
21
|
+
listeners.delete(fn);
|
|
22
|
+
};
|
|
23
|
+
},
|
|
24
|
+
publish(event) {
|
|
25
|
+
history.push(event);
|
|
26
|
+
for (const fn of listeners)
|
|
27
|
+
safelyInvoke(fn, event);
|
|
28
|
+
},
|
|
29
|
+
history() {
|
|
30
|
+
return history;
|
|
31
|
+
},
|
|
32
|
+
reset() {
|
|
33
|
+
listeners.clear();
|
|
34
|
+
history.length = 0;
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { Chunk } from "./engine.js";
|
|
3
|
+
import type { ScanHooks } from "./hooks.js";
|
|
4
|
+
import type { ScanProgress } from "./progress.js";
|
|
5
|
+
export interface ChunkWorkerDeps {
|
|
6
|
+
readonly db: Database.Database;
|
|
7
|
+
readonly scanId: string;
|
|
8
|
+
readonly scannedFileId: string | undefined;
|
|
9
|
+
readonly progress: ScanProgress;
|
|
10
|
+
readonly chunk: Chunk;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
14
|
+
* scanId + progress sink + scanned_files row injected through the agent
|
|
15
|
+
* context. Agent's record_transactions / note_unknown calls write directly to
|
|
16
|
+
* the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
|
|
17
|
+
* as a `chunk_failed` unknown so the resolver can pick them up.
|
|
18
|
+
*/
|
|
19
|
+
export declare function runChunkWorker(deps: ChunkWorkerDeps, hooks: ScanHooks): Promise<void>;
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { randomUUID } from "crypto";
|
|
2
|
+
import { runScanAgent } from "../ai/agent.js";
|
|
3
|
+
import { recordUnknown } from "../db/queries/unknowns.js";
|
|
4
|
+
import { buildDocumentBlock } from "./pdf/pdf.js";
|
|
5
|
+
import { tryExecute } from "./result.js";
|
|
6
|
+
/**
|
|
7
|
+
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
8
|
+
* scanId + progress sink + scanned_files row injected through the agent
|
|
9
|
+
* context. Agent's record_transactions / note_unknown calls write directly to
|
|
10
|
+
* the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
|
|
11
|
+
* as a `chunk_failed` unknown so the resolver can pick them up.
|
|
12
|
+
*/
|
|
13
|
+
export async function runChunkWorker(deps, hooks) {
|
|
14
|
+
const workerId = `cw:${randomUUID()}`;
|
|
15
|
+
hooks.onWorkerStart?.(workerId, deps.chunk);
|
|
16
|
+
const outcome = await tryExecute(() => runScanAgent({
|
|
17
|
+
db: deps.db,
|
|
18
|
+
initialMessages: [
|
|
19
|
+
{
|
|
20
|
+
role: "user",
|
|
21
|
+
content: [
|
|
22
|
+
buildDocumentBlock(deps.chunk.bytes, deps.chunk.fileName, deps.chunk.mime),
|
|
23
|
+
{ type: "text", text: buildChunkPrompt(deps.chunk) },
|
|
24
|
+
],
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
prompt: { fileName: deps.chunk.fileName },
|
|
28
|
+
agentCtx: {
|
|
29
|
+
interactive: false,
|
|
30
|
+
scanId: deps.scanId,
|
|
31
|
+
fileId: deps.scannedFileId,
|
|
32
|
+
chunkId: deps.chunk.chunkId,
|
|
33
|
+
progress: deps.progress,
|
|
34
|
+
},
|
|
35
|
+
}));
|
|
36
|
+
hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
|
|
37
|
+
if (!outcome.ok)
|
|
38
|
+
recordChunkFailure(deps, outcome.error);
|
|
39
|
+
}
|
|
40
|
+
function recordChunkFailure(deps, error) {
|
|
41
|
+
try {
|
|
42
|
+
recordUnknown(deps.db, {
|
|
43
|
+
file_id: deps.scannedFileId ?? null,
|
|
44
|
+
scan_id: deps.scanId,
|
|
45
|
+
transaction_id: null,
|
|
46
|
+
account_id: null,
|
|
47
|
+
kind: "chunk_failed",
|
|
48
|
+
prompt: `Chunk ${deps.chunk.fileName} p${deps.chunk.pageNumber} failed to parse: ${error}.`,
|
|
49
|
+
});
|
|
50
|
+
deps.progress.emit({ chunkId: deps.chunk.chunkId, kind: "unknown" });
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
// failure to record a failure shouldn't crash the file worker
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
function buildChunkPrompt(chunk) {
|
|
57
|
+
return [
|
|
58
|
+
`You are parsing page ${chunk.pageNumber} of ${chunk.totalPages} of ${chunk.fileName}.`,
|
|
59
|
+
``,
|
|
60
|
+
`Steps:`,
|
|
61
|
+
`1. Call list_accounts to see what already exists.`,
|
|
62
|
+
`2. If this page reveals an account that isn't in the chart yet, call create_account once.`,
|
|
63
|
+
`3. For every transaction on this page, call record_transactions (plural) with all rows in one batch.`,
|
|
64
|
+
`4. If the first or last row looks incomplete (no date, or no amount column visible — the row likely continues onto an adjacent page), call note_unknown with kind="boundary_continuation" and the raw row text. Do NOT invent missing fields.`,
|
|
65
|
+
`5. When done with this page, call mark_file_scanned with a short summary.`,
|
|
66
|
+
].join("\n");
|
|
67
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { ScanBuffer } from "./buffer.js";
|
|
3
|
+
import type { Chunk } from "./engine.js";
|
|
4
|
+
import type { ScanHooks } from "./hooks.js";
|
|
5
|
+
export interface ChunkWorkerDeps {
|
|
6
|
+
readonly db: Database.Database;
|
|
7
|
+
readonly buffer: ScanBuffer;
|
|
8
|
+
readonly chunk: Chunk;
|
|
9
|
+
}
|
|
10
|
+
export interface ChunkWorkerResult {
|
|
11
|
+
readonly workerId: string;
|
|
12
|
+
readonly ok: boolean;
|
|
13
|
+
readonly error?: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
17
|
+
* the shared buffer + chunkId injected. Agent's `record_transactions` calls
|
|
18
|
+
* land in the shared buffer; events fan out to the audit engine + dashboard.
|
|
19
|
+
*/
|
|
20
|
+
export declare function runChunkWorker(deps: ChunkWorkerDeps, hooks: ScanHooks): Promise<ChunkWorkerResult>;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { randomUUID } from "crypto";
|
|
2
|
+
import { runScanAgent } from "../ai/agent.js";
|
|
3
|
+
import { buildDocumentBlock } from "./pdf.js";
|
|
4
|
+
import { tryExecute } from "./result.js";
|
|
5
|
+
/**
|
|
6
|
+
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
7
|
+
* the shared buffer + chunkId injected. Agent's `record_transactions` calls
|
|
8
|
+
* land in the shared buffer; events fan out to the audit engine + dashboard.
|
|
9
|
+
*/
|
|
10
|
+
export async function runChunkWorker(deps, hooks) {
|
|
11
|
+
const workerId = `cw:${randomUUID()}`;
|
|
12
|
+
hooks.onWorkerStart?.(workerId, deps.chunk);
|
|
13
|
+
const outcome = await tryExecute(() => runScanAgent({
|
|
14
|
+
db: deps.db,
|
|
15
|
+
initialMessages: [
|
|
16
|
+
{
|
|
17
|
+
role: "user",
|
|
18
|
+
content: [
|
|
19
|
+
buildDocumentBlock(deps.chunk.bytes, deps.chunk.fileName, deps.chunk.mime),
|
|
20
|
+
{ type: "text", text: buildChunkPrompt(deps.chunk) },
|
|
21
|
+
],
|
|
22
|
+
},
|
|
23
|
+
],
|
|
24
|
+
prompt: { fileName: deps.chunk.fileName },
|
|
25
|
+
agentCtx: {
|
|
26
|
+
interactive: false,
|
|
27
|
+
buffer: deps.buffer,
|
|
28
|
+
chunkId: deps.chunk.chunkId,
|
|
29
|
+
},
|
|
30
|
+
onProgress: ev => hooks.onWorkerProgress?.(workerId, deps.chunk, { phase: ev.phase, toolName: ev.toolName }),
|
|
31
|
+
}));
|
|
32
|
+
hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
|
|
33
|
+
if (!outcome.ok) {
|
|
34
|
+
await recordChunkFailure(deps.buffer, deps.chunk, outcome.error);
|
|
35
|
+
return { workerId, ok: false, error: outcome.error };
|
|
36
|
+
}
|
|
37
|
+
return { workerId, ok: true };
|
|
38
|
+
}
|
|
39
|
+
async function recordChunkFailure(buffer, chunk, error) {
|
|
40
|
+
await buffer.appendUnknown({
|
|
41
|
+
chunkId: chunk.chunkId,
|
|
42
|
+
transaction_id: null,
|
|
43
|
+
account_id: null,
|
|
44
|
+
kind: "chunk_failed",
|
|
45
|
+
prompt: `Chunk ${chunk.fileName} p${chunk.pageNumber} failed to parse: ${error}.`,
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
function buildChunkPrompt(chunk) {
|
|
49
|
+
return [
|
|
50
|
+
`You are parsing page ${chunk.pageNumber} of ${chunk.totalPages} of ${chunk.fileName}.`,
|
|
51
|
+
``,
|
|
52
|
+
`Steps:`,
|
|
53
|
+
`1. Call list_accounts to see what already exists.`,
|
|
54
|
+
`2. If this page reveals an account that isn't in the chart yet, call create_account once.`,
|
|
55
|
+
`3. For every transaction on this page, call record_transactions (plural) with all rows in one batch.`,
|
|
56
|
+
`4. If the first or last row looks incomplete (no date, or no amount column visible — the row likely continues onto an adjacent page), call note_unknown with kind="boundary_continuation" and the raw row text. Do NOT invent missing fields.`,
|
|
57
|
+
`5. When done with this page, call mark_file_scanned with a short summary.`,
|
|
58
|
+
].join("\n");
|
|
59
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Chunk, DecryptedFile } from "../engine/types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Split one decrypted PDF into N single-page Chunks. Each chunk is a
|
|
4
|
+
* standalone, valid PDF so the per-chunk LLM agent gets a clean document
|
|
5
|
+
* without siblings.
|
|
6
|
+
*/
|
|
7
|
+
export declare function chunkPdf(file: DecryptedFile): Promise<Chunk[]>;
|