plasalid 0.5.8 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/dist/accounts/taxonomy.d.ts +1 -1
- package/dist/accounts/taxonomy.js +2 -2
- package/dist/ai/agent.d.ts +7 -6
- package/dist/ai/agent.js +9 -8
- package/dist/ai/personas.d.ts +1 -1
- package/dist/ai/personas.js +69 -66
- package/dist/ai/prompt-sections.d.ts +4 -5
- package/dist/ai/prompt-sections.js +11 -11
- package/dist/ai/system-prompt.d.ts +2 -3
- package/dist/ai/system-prompt.js +5 -5
- package/dist/ai/tools/common.js +13 -5
- package/dist/ai/tools/index.js +15 -15
- package/dist/ai/tools/ingest.d.ts +2 -2
- package/dist/ai/tools/ingest.js +210 -87
- package/dist/ai/tools/merchants.js +27 -12
- package/dist/ai/tools/read.js +36 -20
- package/dist/ai/tools/record.js +79 -19
- package/dist/ai/tools/resolve.d.ts +2 -0
- package/dist/ai/tools/resolve.js +195 -0
- package/dist/ai/tools/types.d.ts +5 -7
- package/dist/cli/commands/accounts.js +2 -2
- package/dist/cli/commands/record.js +4 -2
- package/dist/cli/commands/resolve.d.ts +2 -0
- package/dist/cli/commands/resolve.js +13 -0
- package/dist/cli/commands/scan.js +18 -22
- package/dist/cli/commands/status.js +4 -2
- package/dist/cli/index.js +9 -9
- package/dist/cli/ink/hooks/useFooterText.js +1 -1
- package/dist/cli/ink/hooks/useTextInput.js +0 -3
- package/dist/cli/ink/scan_dashboard.d.ts +2 -2
- package/dist/cli/ink/scan_dashboard.js +3 -3
- package/dist/cli/setup.js +6 -3
- package/dist/cli/ux.js +1 -1
- package/dist/db/queries/account-balance.d.ts +140 -0
- package/dist/db/queries/account-balance.js +355 -0
- package/dist/db/queries/account_balance.d.ts +0 -1
- package/dist/db/queries/account_balance.js +0 -10
- package/dist/db/queries/action-log.d.ts +29 -0
- package/dist/db/queries/action-log.js +27 -0
- package/dist/db/queries/action_log.d.ts +1 -1
- package/dist/db/queries/concerns.d.ts +10 -0
- package/dist/db/queries/concerns.js +21 -0
- package/dist/db/queries/transactions.d.ts +3 -22
- package/dist/db/queries/transactions.js +4 -5
- package/dist/db/queries/unknowns.d.ts +62 -0
- package/dist/db/queries/unknowns.js +114 -0
- package/dist/db/schema.js +3 -3
- package/dist/resolver/pipeline.d.ts +16 -0
- package/dist/resolver/pipeline.js +38 -0
- package/dist/resolver/prompts.d.ts +8 -0
- package/dist/resolver/prompts.js +26 -0
- package/dist/scanner/account-mutex.d.ts +1 -0
- package/dist/scanner/account-mutex.js +16 -0
- package/dist/scanner/buffer.d.ts +10 -10
- package/dist/scanner/buffer.js +15 -15
- package/dist/scanner/decrypt-queue.d.ts +57 -0
- package/dist/scanner/decrypt-queue.js +114 -0
- package/dist/scanner/detectors/correlations.d.ts +2 -0
- package/dist/scanner/detectors/correlations.js +51 -0
- package/dist/scanner/detectors/duplicates.d.ts +2 -0
- package/dist/scanner/detectors/duplicates.js +75 -0
- package/dist/scanner/detectors/index.d.ts +18 -0
- package/dist/scanner/detectors/index.js +39 -0
- package/dist/scanner/detectors/recurrences.d.ts +2 -0
- package/dist/scanner/detectors/recurrences.js +49 -0
- package/dist/scanner/detectors/similar_accounts.d.ts +2 -0
- package/dist/scanner/detectors/similar_accounts.js +64 -0
- package/dist/scanner/detectors/similarities.d.ts +2 -0
- package/dist/scanner/detectors/similarities.js +73 -0
- package/dist/scanner/detectors/types.d.ts +16 -0
- package/dist/scanner/detectors/types.js +1 -0
- package/dist/scanner/inspectors/correlations.d.ts +2 -0
- package/dist/scanner/inspectors/correlations.js +47 -0
- package/dist/scanner/inspectors/duplicates.d.ts +2 -0
- package/dist/scanner/inspectors/duplicates.js +75 -0
- package/dist/scanner/inspectors/index.d.ts +19 -0
- package/dist/scanner/inspectors/index.js +39 -0
- package/dist/scanner/inspectors/recurrences.d.ts +2 -0
- package/dist/scanner/inspectors/recurrences.js +49 -0
- package/dist/scanner/inspectors/similarities.d.ts +2 -0
- package/dist/scanner/inspectors/similarities.js +73 -0
- package/dist/scanner/inspectors/types.d.ts +16 -0
- package/dist/scanner/inspectors/types.js +1 -0
- package/dist/scanner/pipeline.d.ts +6 -4
- package/dist/scanner/pipeline.js +51 -88
- package/dist/scanner/prompts.js +2 -2
- package/package.json +2 -1
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { randomUUID } from "crypto";
|
|
2
|
+
/**
|
|
3
|
+
* Insert a new unknowns row and flip the `has_unknown` boolean on whichever
|
|
4
|
+
* target (transaction / account) was named. Returns the new id. The id keeps
|
|
5
|
+
* the historical `cn:` prefix — it's opaque and nothing else references it,
|
|
6
|
+
* so the prefix is a no-op detail.
|
|
7
|
+
*/
|
|
8
|
+
export function recordUnknown(db, input) {
|
|
9
|
+
const id = `cn:${randomUUID()}`;
|
|
10
|
+
db.prepare(`INSERT INTO unknowns (id, file_id, transaction_id, account_id, kind, prompt, options_json) VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, input.file_id, input.transaction_id, input.account_id, input.kind ?? null, input.prompt, input.options ? JSON.stringify(input.options) : null);
|
|
11
|
+
if (input.transaction_id) {
|
|
12
|
+
db.prepare(`UPDATE transactions SET has_unknown = 1 WHERE id = ?`).run(input.transaction_id);
|
|
13
|
+
}
|
|
14
|
+
if (input.account_id) {
|
|
15
|
+
db.prepare(`UPDATE accounts SET has_unknown = 1 WHERE id = ?`).run(input.account_id);
|
|
16
|
+
}
|
|
17
|
+
return id;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Mark an existing unknown as resolved with the user's answer and, if no other
|
|
21
|
+
* open unknowns reference the same target, clear the target's `has_unknown`
|
|
22
|
+
* flag. Returns the unknown's target so callers can log or react.
|
|
23
|
+
*/
|
|
24
|
+
export function resolveUnknown(db, id, answer) {
|
|
25
|
+
const target = getUnknownTarget(db, id);
|
|
26
|
+
if (!target)
|
|
27
|
+
return null;
|
|
28
|
+
db.prepare(`UPDATE unknowns SET answer = ?, resolved_at = datetime('now') WHERE id = ?`).run(answer, id);
|
|
29
|
+
maybeClearHasUnknownFlags(db, target);
|
|
30
|
+
return target;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Look up the transaction/account an unknown is attached to. Returns null when
|
|
34
|
+
* the unknown id doesn't exist.
|
|
35
|
+
*/
|
|
36
|
+
export function getUnknownTarget(db, id) {
|
|
37
|
+
const row = db
|
|
38
|
+
.prepare(`SELECT transaction_id, account_id FROM unknowns WHERE id = ?`)
|
|
39
|
+
.get(id);
|
|
40
|
+
return row ?? null;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Clear `has_unknown` on the named transaction / account if no other open
|
|
44
|
+
* unknowns still reference it. Safe to call after any resolution; idempotent.
|
|
45
|
+
*/
|
|
46
|
+
export function maybeClearHasUnknownFlags(db, target) {
|
|
47
|
+
if (target.transaction_id) {
|
|
48
|
+
const open = db
|
|
49
|
+
.prepare(`SELECT 1 FROM unknowns WHERE transaction_id = ? AND resolved_at IS NULL LIMIT 1`)
|
|
50
|
+
.get(target.transaction_id);
|
|
51
|
+
if (!open)
|
|
52
|
+
db.prepare(`UPDATE transactions SET has_unknown = 0 WHERE id = ?`).run(target.transaction_id);
|
|
53
|
+
}
|
|
54
|
+
if (target.account_id) {
|
|
55
|
+
const open = db
|
|
56
|
+
.prepare(`SELECT 1 FROM unknowns WHERE account_id = ? AND resolved_at IS NULL LIMIT 1`)
|
|
57
|
+
.get(target.account_id);
|
|
58
|
+
if (!open)
|
|
59
|
+
db.prepare(`UPDATE accounts SET has_unknown = 0 WHERE id = ?`).run(target.account_id);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
export function countOpenUnknowns(db, scope = {}) {
|
|
63
|
+
const conditions = ["resolved_at IS NULL"];
|
|
64
|
+
const params = [];
|
|
65
|
+
if (scope.file_id) {
|
|
66
|
+
conditions.push("file_id = ?");
|
|
67
|
+
params.push(scope.file_id);
|
|
68
|
+
}
|
|
69
|
+
if (scope.transaction_id) {
|
|
70
|
+
conditions.push("transaction_id = ?");
|
|
71
|
+
params.push(scope.transaction_id);
|
|
72
|
+
}
|
|
73
|
+
if (scope.account_id) {
|
|
74
|
+
conditions.push("account_id = ?");
|
|
75
|
+
params.push(scope.account_id);
|
|
76
|
+
}
|
|
77
|
+
if (scope.kind) {
|
|
78
|
+
conditions.push("kind = ?");
|
|
79
|
+
params.push(scope.kind);
|
|
80
|
+
}
|
|
81
|
+
const row = db
|
|
82
|
+
.prepare(`SELECT COUNT(*) AS n FROM unknowns WHERE ${conditions.join(" AND ")}`)
|
|
83
|
+
.get(...params);
|
|
84
|
+
return row.n;
|
|
85
|
+
}
|
|
86
|
+
export function listOpenUnknowns(db, limit = 50) {
|
|
87
|
+
const capped = Math.min(Math.max(limit, 1), 200);
|
|
88
|
+
return db.prepare(`SELECT id, file_id, transaction_id, account_id, kind, prompt, options_json, created_at
|
|
89
|
+
FROM unknowns
|
|
90
|
+
WHERE resolved_at IS NULL
|
|
91
|
+
ORDER BY created_at ASC
|
|
92
|
+
LIMIT ?`).all(capped);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Open unknowns filtered by `kind`, ordered by the position of the kind in the
|
|
96
|
+
* input array (priority) then by created_at. Pass `["uncategorized","duplicate"]`
|
|
97
|
+
* to drain uncategorized rows before duplicates.
|
|
98
|
+
*
|
|
99
|
+
* `kind` is free-text TEXT in the schema; canonical values used by built-ins:
|
|
100
|
+
* uncategorized, duplicate, correlation, recurrence_candidate,
|
|
101
|
+
* similar_accounts, file_password
|
|
102
|
+
*/
|
|
103
|
+
export function listOpenUnknownsByKind(db, kinds, limit = 50) {
|
|
104
|
+
if (kinds.length === 0)
|
|
105
|
+
return [];
|
|
106
|
+
const capped = Math.min(Math.max(limit, 1), 200);
|
|
107
|
+
const placeholders = kinds.map(() => "?").join(",");
|
|
108
|
+
const cases = kinds.map((_, i) => `WHEN ? THEN ${i}`).join(" ");
|
|
109
|
+
return db.prepare(`SELECT id, file_id, transaction_id, account_id, kind, prompt, options_json, created_at
|
|
110
|
+
FROM unknowns
|
|
111
|
+
WHERE resolved_at IS NULL AND kind IN (${placeholders})
|
|
112
|
+
ORDER BY CASE kind ${cases} ELSE ${kinds.length} END, created_at ASC
|
|
113
|
+
LIMIT ?`).all(...kinds, ...kinds, capped);
|
|
114
|
+
}
|
package/dist/db/schema.js
CHANGED
|
@@ -14,7 +14,7 @@ export function migrate(db) {
|
|
|
14
14
|
points_balance REAL,
|
|
15
15
|
metadata_json TEXT,
|
|
16
16
|
pii_flag INTEGER NOT NULL DEFAULT 0,
|
|
17
|
-
|
|
17
|
+
has_unknown INTEGER NOT NULL DEFAULT 0,
|
|
18
18
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
19
19
|
);
|
|
20
20
|
|
|
@@ -74,7 +74,7 @@ export function migrate(db) {
|
|
|
74
74
|
source_file_id TEXT REFERENCES scanned_files(id) ON DELETE CASCADE,
|
|
75
75
|
source_page INTEGER,
|
|
76
76
|
recurrence_id TEXT REFERENCES recurrences(id) ON DELETE SET NULL,
|
|
77
|
-
|
|
77
|
+
has_unknown INTEGER NOT NULL DEFAULT 0,
|
|
78
78
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
79
79
|
);
|
|
80
80
|
|
|
@@ -98,7 +98,7 @@ export function migrate(db) {
|
|
|
98
98
|
CREATE INDEX IF NOT EXISTS postings_transaction_idx ON postings(transaction_id);
|
|
99
99
|
CREATE INDEX IF NOT EXISTS postings_account_idx ON postings(account_id);
|
|
100
100
|
|
|
101
|
-
CREATE TABLE IF NOT EXISTS
|
|
101
|
+
CREATE TABLE IF NOT EXISTS unknowns (
|
|
102
102
|
id TEXT PRIMARY KEY,
|
|
103
103
|
file_id TEXT REFERENCES scanned_files(id) ON DELETE CASCADE,
|
|
104
104
|
transaction_id TEXT REFERENCES transactions(id) ON DELETE CASCADE,
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface ResolveOptions {
|
|
2
|
+
accountId?: string;
|
|
3
|
+
from?: string;
|
|
4
|
+
to?: string;
|
|
5
|
+
kind?: string;
|
|
6
|
+
interactive?: boolean;
|
|
7
|
+
/** Hard cap on unknowns handed to the agent in one run. Default 200. */
|
|
8
|
+
limit?: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Hand every open unknown to the resolve agent in a single invocation. The
|
|
12
|
+
* agent surveys, applies memory-driven and heuristic resolutions silently,
|
|
13
|
+
* groups what remains, asks the user once per group, and reports back via
|
|
14
|
+
* mark_resolve_done. The pipeline just sets up plumbing and prints the report.
|
|
15
|
+
*/
|
|
16
|
+
export declare function runResolve(opts?: ResolveOptions): Promise<string>;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { getDb } from "../db/connection.js";
|
|
2
|
+
import { runResolveAgent } from "../ai/agent.js";
|
|
3
|
+
import { listOpenUnknowns, listOpenUnknownsByKind } from "../db/queries/unknowns.js";
|
|
4
|
+
import { statusSpinner, makePromptUser, makeAgentOnProgress, } from "../cli/ux.js";
|
|
5
|
+
import { buildResolveUserMessage } from "./prompts.js";
|
|
6
|
+
/**
|
|
7
|
+
* Hand every open unknown to the resolve agent in a single invocation. The
|
|
8
|
+
* agent surveys, applies memory-driven and heuristic resolutions silently,
|
|
9
|
+
* groups what remains, asks the user once per group, and reports back via
|
|
10
|
+
* mark_resolve_done. The pipeline just sets up plumbing and prints the report.
|
|
11
|
+
*/
|
|
12
|
+
export async function runResolve(opts = {}) {
|
|
13
|
+
const db = getDb();
|
|
14
|
+
const unknowns = opts.kind
|
|
15
|
+
? listOpenUnknownsByKind(db, [opts.kind], opts.limit ?? 200)
|
|
16
|
+
: listOpenUnknowns(db, opts.limit ?? 200);
|
|
17
|
+
if (unknowns.length === 0)
|
|
18
|
+
return "No open unknowns.";
|
|
19
|
+
const interactive = opts.interactive ?? true;
|
|
20
|
+
const spinner = statusSpinner(`Resolving ${unknowns.length} unknown(s)...`);
|
|
21
|
+
const promptUser = interactive ? makePromptUser(spinner) : undefined;
|
|
22
|
+
let summary = "";
|
|
23
|
+
try {
|
|
24
|
+
await runResolveAgent({
|
|
25
|
+
db,
|
|
26
|
+
prompt: { accountId: opts.accountId, from: opts.from, to: opts.to },
|
|
27
|
+
initialMessages: [{ role: "user", content: buildResolveUserMessage(unknowns) }],
|
|
28
|
+
agentCtx: { interactive, promptUser, onComplete: (s) => { summary = s; } },
|
|
29
|
+
onProgress: makeAgentOnProgress(spinner),
|
|
30
|
+
});
|
|
31
|
+
spinner.succeed("Resolve done.");
|
|
32
|
+
}
|
|
33
|
+
catch (err) {
|
|
34
|
+
spinner.fail(`Resolve failed: ${err.message}`);
|
|
35
|
+
throw err;
|
|
36
|
+
}
|
|
37
|
+
return summary;
|
|
38
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { OpenUnknownRow } from "../db/queries/unknowns.js";
|
|
2
|
+
/**
|
|
3
|
+
* Kickoff message handed to the resolve agent. Data only — one line per
|
|
4
|
+
* unknown, with all the fields the persona's six-step workflow needs (id,
|
|
5
|
+
* kind, transaction/account/file ids, prompt, options). Instructions live in
|
|
6
|
+
* RESOLVE_PERSONA; the system prompt already carries memory rules.
|
|
7
|
+
*/
|
|
8
|
+
export declare function buildResolveUserMessage(unknowns: OpenUnknownRow[]): string;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Kickoff message handed to the resolve agent. Data only — one line per
|
|
3
|
+
* unknown, with all the fields the persona's six-step workflow needs (id,
|
|
4
|
+
* kind, transaction/account/file ids, prompt, options). Instructions live in
|
|
5
|
+
* RESOLVE_PERSONA; the system prompt already carries memory rules.
|
|
6
|
+
*/
|
|
7
|
+
export function buildResolveUserMessage(unknowns) {
|
|
8
|
+
const lines = [`${unknowns.length} open unknown(s) to resolve.`, ``, `Unknowns:`];
|
|
9
|
+
for (const c of unknowns) {
|
|
10
|
+
const options = parseOptions(c.options_json);
|
|
11
|
+
const optionsStr = options.length > 0 ? ` | options=[${options.join(" / ")}]` : "";
|
|
12
|
+
lines.push(`- ${c.id} | kind=${c.kind ?? "(none)"} | tx=${c.transaction_id ?? "(none)"} | acct=${c.account_id ?? "(none)"} | file=${c.file_id ?? "(none)"}${optionsStr}`, ` prompt: ${c.prompt.replace(/\n/g, " ")}`);
|
|
13
|
+
}
|
|
14
|
+
return lines.join("\n");
|
|
15
|
+
}
|
|
16
|
+
function parseOptions(json) {
|
|
17
|
+
if (!json)
|
|
18
|
+
return [];
|
|
19
|
+
try {
|
|
20
|
+
const parsed = JSON.parse(json);
|
|
21
|
+
return Array.isArray(parsed) ? parsed.filter((o) => typeof o === "string") : [];
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
return [];
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function runExclusive<T>(fn: () => Promise<T> | T): Promise<T>;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Process-wide serialization for write operations that race when multiple scan
|
|
3
|
+
* agents run in parallel. Each in-flight `create_account` / `update_account_metadata`
|
|
4
|
+
* is held inside `runExclusive` so the SQLite write + the subsequent read-back
|
|
5
|
+
* by another agent's `list_accounts` are consistent.
|
|
6
|
+
*
|
|
7
|
+
* Single tail-promise queue: cheap, deterministic, no extra deps.
|
|
8
|
+
*/
|
|
9
|
+
let tail = Promise.resolve();
|
|
10
|
+
export function runExclusive(fn) {
|
|
11
|
+
const next = tail.then(() => fn());
|
|
12
|
+
// Swallow rejection so a thrown callback doesn't poison the queue for the
|
|
13
|
+
// next caller. The caller still sees the rejection through `next`.
|
|
14
|
+
tail = next.catch(() => undefined);
|
|
15
|
+
return next;
|
|
16
|
+
}
|
package/dist/scanner/buffer.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type Database from "libsql";
|
|
2
2
|
import { type TransactionInput } from "../db/queries/transactions.js";
|
|
3
3
|
/**
|
|
4
|
-
* One scan agent's pending writes. Transactions and
|
|
4
|
+
* One scan agent's pending writes. Transactions and unknowns accumulate here
|
|
5
5
|
* while the LLM works; nothing hits the DB until `commit()` runs inside a
|
|
6
6
|
* single SQLite transaction. If `commit()` throws, the transaction rolls back
|
|
7
7
|
* and the DB stays exactly as it was before this file's scan began.
|
|
@@ -11,8 +11,8 @@ import { type TransactionInput } from "../db/queries/transactions.js";
|
|
|
11
11
|
* their own mutexes so concurrent agents see each other's creates and don't
|
|
12
12
|
* duplicate.
|
|
13
13
|
*/
|
|
14
|
-
export interface
|
|
15
|
-
/** Synthesized when the LLM called
|
|
14
|
+
export interface BufferedUnknown {
|
|
15
|
+
/** Synthesized when the LLM called note_unknown with a buffered transaction_id. */
|
|
16
16
|
transaction_id: string | null;
|
|
17
17
|
account_id: string | null;
|
|
18
18
|
kind?: string | null;
|
|
@@ -20,32 +20,32 @@ export interface BufferedConcern {
|
|
|
20
20
|
options?: string[];
|
|
21
21
|
}
|
|
22
22
|
export interface BufferedTransaction {
|
|
23
|
-
/** Synthesized at queue-time so
|
|
23
|
+
/** Synthesized at queue-time so unknowns can reference this transaction. */
|
|
24
24
|
transaction_id: string;
|
|
25
25
|
input: TransactionInput;
|
|
26
26
|
}
|
|
27
27
|
export declare class BufferedWriteContext {
|
|
28
28
|
readonly fileName: string;
|
|
29
29
|
readonly transactions: BufferedTransaction[];
|
|
30
|
-
readonly
|
|
30
|
+
readonly unknowns: BufferedUnknown[];
|
|
31
31
|
doneSummary: string | null;
|
|
32
32
|
constructor(fileName: string);
|
|
33
33
|
/**
|
|
34
34
|
* Queue a transaction. Returns the synthesized transaction id so the agent
|
|
35
|
-
* can use it in subsequent
|
|
35
|
+
* can use it in subsequent note_unknown calls inside the same file.
|
|
36
36
|
*/
|
|
37
37
|
appendTransaction(input: TransactionInput): string;
|
|
38
|
-
|
|
38
|
+
appendUnknown(unknown: BufferedUnknown): void;
|
|
39
39
|
markDone(summary: string): void;
|
|
40
40
|
get isDone(): boolean;
|
|
41
41
|
/**
|
|
42
42
|
* Replay all buffered writes inside one DB transaction. `scannedFileId` is
|
|
43
|
-
* stamped onto every transaction and
|
|
44
|
-
* file. Returns `{ transactions,
|
|
43
|
+
* stamped onto every transaction and unknown so they're attributable to this
|
|
44
|
+
* file. Returns `{ transactions, unknowns }` counts so the caller can report
|
|
45
45
|
* them.
|
|
46
46
|
*/
|
|
47
47
|
commit(db: Database.Database, scannedFileId: string): {
|
|
48
48
|
transactions: number;
|
|
49
|
-
|
|
49
|
+
unknowns: number;
|
|
50
50
|
};
|
|
51
51
|
}
|
package/dist/scanner/buffer.js
CHANGED
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
2
|
import { insertTransactionRows, validateTransaction, } from "../db/queries/transactions.js";
|
|
3
|
-
import {
|
|
3
|
+
import { recordUnknown } from "../db/queries/unknowns.js";
|
|
4
4
|
export class BufferedWriteContext {
|
|
5
5
|
fileName;
|
|
6
6
|
transactions = [];
|
|
7
|
-
|
|
7
|
+
unknowns = [];
|
|
8
8
|
doneSummary = null;
|
|
9
9
|
constructor(fileName) {
|
|
10
10
|
this.fileName = fileName;
|
|
11
11
|
}
|
|
12
12
|
/**
|
|
13
13
|
* Queue a transaction. Returns the synthesized transaction id so the agent
|
|
14
|
-
* can use it in subsequent
|
|
14
|
+
* can use it in subsequent note_unknown calls inside the same file.
|
|
15
15
|
*/
|
|
16
16
|
appendTransaction(input) {
|
|
17
17
|
const transactionId = `tx:${randomUUID()}`;
|
|
18
18
|
this.transactions.push({ transaction_id: transactionId, input });
|
|
19
19
|
return transactionId;
|
|
20
20
|
}
|
|
21
|
-
|
|
22
|
-
this.
|
|
21
|
+
appendUnknown(unknown) {
|
|
22
|
+
this.unknowns.push(unknown);
|
|
23
23
|
}
|
|
24
24
|
markDone(summary) {
|
|
25
25
|
this.doneSummary = summary;
|
|
@@ -29,8 +29,8 @@ export class BufferedWriteContext {
|
|
|
29
29
|
}
|
|
30
30
|
/**
|
|
31
31
|
* Replay all buffered writes inside one DB transaction. `scannedFileId` is
|
|
32
|
-
* stamped onto every transaction and
|
|
33
|
-
* file. Returns `{ transactions,
|
|
32
|
+
* stamped onto every transaction and unknown so they're attributable to this
|
|
33
|
+
* file. Returns `{ transactions, unknowns }` counts so the caller can report
|
|
34
34
|
* them.
|
|
35
35
|
*/
|
|
36
36
|
commit(db, scannedFileId) {
|
|
@@ -46,18 +46,18 @@ export class BufferedWriteContext {
|
|
|
46
46
|
for (const { validated: v } of validated) {
|
|
47
47
|
insertTransactionRows(db, v);
|
|
48
48
|
}
|
|
49
|
-
for (const
|
|
50
|
-
|
|
49
|
+
for (const u of this.unknowns) {
|
|
50
|
+
recordUnknown(db, {
|
|
51
51
|
file_id: scannedFileId,
|
|
52
|
-
transaction_id:
|
|
53
|
-
account_id:
|
|
54
|
-
kind:
|
|
55
|
-
prompt:
|
|
56
|
-
options:
|
|
52
|
+
transaction_id: u.transaction_id,
|
|
53
|
+
account_id: u.account_id,
|
|
54
|
+
kind: u.kind ?? null,
|
|
55
|
+
prompt: u.prompt,
|
|
56
|
+
options: u.options,
|
|
57
57
|
});
|
|
58
58
|
}
|
|
59
59
|
});
|
|
60
60
|
tx();
|
|
61
|
-
return { transactions: this.transactions.length,
|
|
61
|
+
return { transactions: this.transactions.length, unknowns: this.unknowns.length };
|
|
62
62
|
}
|
|
63
63
|
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { ScannedFile } from "./walker.js";
|
|
3
|
+
export interface DecryptedFile {
|
|
4
|
+
path: string;
|
|
5
|
+
fileName: string;
|
|
6
|
+
relPath: string;
|
|
7
|
+
hash: string;
|
|
8
|
+
mime: string;
|
|
9
|
+
decryptedBytes: Buffer;
|
|
10
|
+
/** True if a prior scan covered this hash; only present when --force is set. */
|
|
11
|
+
replacesPriorScannedFileId?: string;
|
|
12
|
+
}
|
|
13
|
+
export interface SkippedFile {
|
|
14
|
+
file: ScannedFile;
|
|
15
|
+
/** id of the scanned_files row that already has this hash. */
|
|
16
|
+
existingScannedFileId: string;
|
|
17
|
+
}
|
|
18
|
+
export interface FailedFile {
|
|
19
|
+
file: ScannedFile;
|
|
20
|
+
error: string;
|
|
21
|
+
}
|
|
22
|
+
export interface DecryptQueueResult {
|
|
23
|
+
decrypted: DecryptedFile[];
|
|
24
|
+
skipped: SkippedFile[];
|
|
25
|
+
failed: FailedFile[];
|
|
26
|
+
}
|
|
27
|
+
export interface DecryptQueueOptions {
|
|
28
|
+
/** Re-decrypt and queue files that match a prior hash. */
|
|
29
|
+
force: boolean;
|
|
30
|
+
/** If false, never prompt for a password; treat unlock failure as failed. */
|
|
31
|
+
interactive: boolean;
|
|
32
|
+
/** Called as each file finishes (any outcome) so a spinner can update its label. */
|
|
33
|
+
onProgress?: (event: {
|
|
34
|
+
index: number;
|
|
35
|
+
total: number;
|
|
36
|
+
fileName: string;
|
|
37
|
+
outcome: "decrypted" | "skipped" | "failed";
|
|
38
|
+
}) => void;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Phase 1 of scan: walk every file in the queue, decrypt any that need it,
|
|
42
|
+
* and return a partition (decrypted / skipped / failed). The actual agent
|
|
43
|
+
* work in Phase 2 only sees `decrypted` — no password prompts during the
|
|
44
|
+
* parallel scan loop.
|
|
45
|
+
*
|
|
46
|
+
* Failures don't abort; the caller (CLI) confirms whether to proceed.
|
|
47
|
+
*/
|
|
48
|
+
export declare function decryptQueue(db: Database.Database, files: ScannedFile[], opts: DecryptQueueOptions): Promise<DecryptQueueResult>;
|
|
49
|
+
/**
|
|
50
|
+
* Interactive go/no-go gate when some files failed to decrypt. Returns true
|
|
51
|
+
* if the caller should proceed with the decrypted set, false to abort the
|
|
52
|
+
* whole scan run.
|
|
53
|
+
*
|
|
54
|
+
* Returns true automatically when interactive is false (CI / non-TTY runs);
|
|
55
|
+
* the caller is expected to inspect `result.failed` and report.
|
|
56
|
+
*/
|
|
57
|
+
export declare function confirmProceedAfterFailures(result: DecryptQueueResult, interactive: boolean): Promise<boolean>;
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
import inquirer from "inquirer";
|
|
3
|
+
import { readPdf } from "./pdf.js";
|
|
4
|
+
import { unlockIfNeeded, persistUnlockOutcome } from "./unlock.js";
|
|
5
|
+
async function decryptOne(db, file, opts) {
|
|
6
|
+
let pdf;
|
|
7
|
+
try {
|
|
8
|
+
pdf = readPdf(file.path);
|
|
9
|
+
}
|
|
10
|
+
catch (err) {
|
|
11
|
+
return { kind: "failed", error: `read failed: ${errorMessage(err)}` };
|
|
12
|
+
}
|
|
13
|
+
const existing = findScannedByHash(db, pdf.hash);
|
|
14
|
+
if (existing && !opts.force) {
|
|
15
|
+
return { kind: "skipped", existingScannedFileId: existing.id };
|
|
16
|
+
}
|
|
17
|
+
try {
|
|
18
|
+
const unlocked = await unlockIfNeeded({
|
|
19
|
+
db,
|
|
20
|
+
filePath: file.path,
|
|
21
|
+
bytes: pdf.bytes,
|
|
22
|
+
interactive: opts.interactive,
|
|
23
|
+
});
|
|
24
|
+
persistUnlockOutcome(db, file.path, unlocked.outcome);
|
|
25
|
+
return {
|
|
26
|
+
kind: "decrypted",
|
|
27
|
+
file: {
|
|
28
|
+
path: file.path,
|
|
29
|
+
fileName: file.name,
|
|
30
|
+
relPath: file.relPath,
|
|
31
|
+
hash: pdf.hash,
|
|
32
|
+
mime: pdf.mime,
|
|
33
|
+
decryptedBytes: unlocked.decrypted,
|
|
34
|
+
replacesPriorScannedFileId: existing?.id,
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
return { kind: "failed", error: errorMessage(err) || "unlock failed" };
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
function errorMessage(err) {
|
|
43
|
+
return err instanceof Error ? err.message : String(err);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Phase 1 of scan: walk every file in the queue, decrypt any that need it,
|
|
47
|
+
* and return a partition (decrypted / skipped / failed). The actual agent
|
|
48
|
+
* work in Phase 2 only sees `decrypted` — no password prompts during the
|
|
49
|
+
* parallel scan loop.
|
|
50
|
+
*
|
|
51
|
+
* Failures don't abort; the caller (CLI) confirms whether to proceed.
|
|
52
|
+
*/
|
|
53
|
+
export async function decryptQueue(db, files, opts) {
|
|
54
|
+
const decrypted = [];
|
|
55
|
+
const skipped = [];
|
|
56
|
+
const failed = [];
|
|
57
|
+
for (let i = 0; i < files.length; i++) {
|
|
58
|
+
const file = files[i];
|
|
59
|
+
const outcome = await decryptOne(db, file, opts);
|
|
60
|
+
const progress = (kind) => opts.onProgress?.({ index: i, total: files.length, fileName: file.name, outcome: kind });
|
|
61
|
+
switch (outcome.kind) {
|
|
62
|
+
case "decrypted":
|
|
63
|
+
decrypted.push(outcome.file);
|
|
64
|
+
progress("decrypted");
|
|
65
|
+
break;
|
|
66
|
+
case "skipped":
|
|
67
|
+
skipped.push({ file, existingScannedFileId: outcome.existingScannedFileId });
|
|
68
|
+
progress("skipped");
|
|
69
|
+
break;
|
|
70
|
+
case "failed":
|
|
71
|
+
failed.push({ file, error: outcome.error });
|
|
72
|
+
progress("failed");
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return { decrypted, skipped, failed };
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Interactive go/no-go gate when some files failed to decrypt. Returns true
|
|
80
|
+
* if the caller should proceed with the decrypted set, false to abort the
|
|
81
|
+
* whole scan run.
|
|
82
|
+
*
|
|
83
|
+
* Returns true automatically when interactive is false (CI / non-TTY runs);
|
|
84
|
+
* the caller is expected to inspect `result.failed` and report.
|
|
85
|
+
*/
|
|
86
|
+
export async function confirmProceedAfterFailures(result, interactive) {
|
|
87
|
+
if (result.failed.length === 0)
|
|
88
|
+
return true;
|
|
89
|
+
console.log("");
|
|
90
|
+
console.log(chalk.yellow(`${result.failed.length} file(s) could not be decrypted:`));
|
|
91
|
+
for (const f of result.failed) {
|
|
92
|
+
console.log(` ${chalk.red("✗")} ${f.file.relPath} — ${chalk.dim(f.error)}`);
|
|
93
|
+
}
|
|
94
|
+
if (result.decrypted.length === 0) {
|
|
95
|
+
console.log(chalk.red("Nothing to scan."));
|
|
96
|
+
return false;
|
|
97
|
+
}
|
|
98
|
+
if (!interactive)
|
|
99
|
+
return true;
|
|
100
|
+
const { proceed } = (await inquirer.prompt([
|
|
101
|
+
{
|
|
102
|
+
type: "confirm",
|
|
103
|
+
name: "proceed",
|
|
104
|
+
message: `Proceed scanning the ${result.decrypted.length} file(s) that decrypted successfully?`,
|
|
105
|
+
default: true,
|
|
106
|
+
},
|
|
107
|
+
]));
|
|
108
|
+
return proceed;
|
|
109
|
+
}
|
|
110
|
+
function findScannedByHash(db, hash) {
|
|
111
|
+
return db
|
|
112
|
+
.prepare(`SELECT id FROM scanned_files WHERE file_hash = ?`)
|
|
113
|
+
.get(hash) ?? null;
|
|
114
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { findCorrelatedTransactions } from "../../db/queries/transactions.js";
|
|
2
|
+
import { formatAmount } from "../../currency.js";
|
|
3
|
+
/**
|
|
4
|
+
* Cross-account correlation: a single money movement that landed on two
|
|
5
|
+
* different accounts (e.g. transfer from bank to card recorded once per
|
|
6
|
+
* statement). One concern per pair, attached to the newer side. Only pairs
|
|
7
|
+
* with at least one side in `fileIds` are surfaced.
|
|
8
|
+
*
|
|
9
|
+
* Replaces the pre-commit `applyCrossFileCorrelations` that used to run on
|
|
10
|
+
* buffered (uncommitted) transactions; running post-commit lets us reuse the
|
|
11
|
+
* DB-backed `findCorrelatedTransactions` and avoid the buffer-shape adapter.
|
|
12
|
+
*/
|
|
13
|
+
function detect(db, scope) {
|
|
14
|
+
if (scope.fileIds.length === 0)
|
|
15
|
+
return [];
|
|
16
|
+
const pairs = findCorrelatedTransactions(db);
|
|
17
|
+
if (pairs.length === 0)
|
|
18
|
+
return [];
|
|
19
|
+
const inScope = transactionsInScope(db, scope.fileIds);
|
|
20
|
+
const out = [];
|
|
21
|
+
for (const pair of pairs) {
|
|
22
|
+
if (!inScope.has(pair.a.id) && !inScope.has(pair.b.id))
|
|
23
|
+
continue;
|
|
24
|
+
const [older, newer] = pair.a.date <= pair.b.date ? [pair.a, pair.b] : [pair.b, pair.a];
|
|
25
|
+
out.push({
|
|
26
|
+
file_id: null,
|
|
27
|
+
transaction_id: newer.id,
|
|
28
|
+
account_id: null,
|
|
29
|
+
kind: "correlation",
|
|
30
|
+
prompt: buildPrompt(pair, older, newer),
|
|
31
|
+
options: ["Merge into one transaction", "Keep separate (these are two real events)", "Skip"],
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
return out;
|
|
35
|
+
}
|
|
36
|
+
function buildPrompt(pair, older, newer) {
|
|
37
|
+
const amount = formatAmount(pair.amount, pair.currency);
|
|
38
|
+
return [
|
|
39
|
+
`Possible cross-account correlation (${amount}, ${pair.day_gap} day(s) apart).`,
|
|
40
|
+
` ${newer.date} — ${newer.description} — ${newer.account_names.join(", ")}`,
|
|
41
|
+
` ${older.date} — ${older.description} — ${older.account_names.join(", ")}`,
|
|
42
|
+
].join("\n");
|
|
43
|
+
}
|
|
44
|
+
function transactionsInScope(db, fileIds) {
|
|
45
|
+
const placeholders = fileIds.map(() => "?").join(",");
|
|
46
|
+
const rows = db
|
|
47
|
+
.prepare(`SELECT id FROM transactions WHERE source_file_id IN (${placeholders})`)
|
|
48
|
+
.all(...fileIds);
|
|
49
|
+
return new Set(rows.map(r => r.id));
|
|
50
|
+
}
|
|
51
|
+
export const correlationsDetector = { name: "correlations", detect };
|