plasalid 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/ai/personas.js +29 -6
- package/dist/ai/prompt-sections.d.ts +10 -0
- package/dist/ai/prompt-sections.js +29 -0
- package/dist/ai/system-prompt.js +10 -6
- package/dist/ai/tools/clarify.js +35 -0
- package/dist/ai/tools/common.js +3 -2
- package/dist/ai/tools/index.js +6 -3
- package/dist/ai/tools/ingest.js +47 -35
- package/dist/ai/tools/mutate.d.ts +2 -0
- package/dist/ai/tools/mutate.js +81 -0
- package/dist/cli/commands/accounts.d.ts +1 -4
- package/dist/cli/commands/accounts.js +12 -101
- package/dist/cli/commands/files.d.ts +7 -0
- package/dist/cli/commands/files.js +24 -0
- package/dist/cli/commands/rules.d.ts +4 -12
- package/dist/cli/commands/rules.js +33 -67
- package/dist/cli/commands/scan.js +14 -12
- package/dist/cli/commands/status.js +5 -3
- package/dist/cli/commands/transactions.d.ts +0 -2
- package/dist/cli/commands/transactions.js +10 -63
- package/dist/cli/format.js +22 -32
- package/dist/cli/helper.d.ts +9 -1
- package/dist/cli/helper.js +17 -2
- package/dist/cli/index.js +37 -32
- package/dist/cli/ink/FilesBrowser.d.ts +7 -0
- package/dist/cli/ink/FilesBrowser.js +103 -0
- package/dist/cli/ink/ListBrowser.d.ts +16 -1
- package/dist/cli/ink/ListBrowser.js +36 -49
- package/dist/cli/ink/PromptFrame.js +1 -1
- package/dist/cli/ink/RulesBrowser.d.ts +7 -0
- package/dist/cli/ink/RulesBrowser.js +67 -0
- package/dist/cli/ink/ScanDashboard.js +90 -68
- package/dist/cli/ink/hooks/useFooterText.js +14 -22
- package/dist/cli/ink/keys.d.ts +2 -0
- package/dist/cli/ink/keys.js +19 -0
- package/dist/db/queries/files.d.ts +29 -0
- package/dist/db/queries/files.js +34 -0
- package/dist/db/queries/questions.d.ts +17 -0
- package/dist/db/queries/questions.js +47 -9
- package/dist/db/queries/rules.d.ts +31 -0
- package/dist/db/queries/rules.js +55 -0
- package/dist/db/queries/transactions.d.ts +34 -0
- package/dist/db/queries/transactions.js +86 -0
- package/dist/db/schema.js +17 -0
- package/dist/scanner/clarifier-memory.d.ts +15 -3
- package/dist/scanner/clarifier-memory.js +38 -17
- package/dist/scanner/clarifier.d.ts +2 -1
- package/dist/scanner/clarifier.js +40 -26
- package/dist/scanner/commit-pipeline.d.ts +56 -0
- package/dist/scanner/commit-pipeline.js +204 -0
- package/dist/scanner/committer.d.ts +56 -0
- package/dist/scanner/committer.js +204 -0
- package/dist/scanner/parse.js +27 -7
- package/dist/scanner/recurrence-pipeline.d.ts +28 -0
- package/dist/scanner/recurrence-pipeline.js +126 -0
- package/dist/scanner/recurrence.d.ts +28 -0
- package/dist/scanner/recurrence.js +155 -0
- package/dist/scanner/rule-keys.d.ts +13 -0
- package/dist/scanner/rule-keys.js +28 -0
- package/dist/scanner/rules.d.ts +13 -0
- package/dist/scanner/rules.js +28 -0
- package/dist/scanner/worker.js +4 -2
- package/package.json +1 -1
|
@@ -129,6 +129,87 @@ export function updatePosting(db, postingId, fields) {
|
|
|
129
129
|
export function deleteTransaction(db, transactionId) {
|
|
130
130
|
return db.prepare(`DELETE FROM transactions WHERE id = ?`).run(transactionId).changes;
|
|
131
131
|
}
|
|
132
|
+
/**
|
|
133
|
+
* Backfill primitive. Update every posting matching the filter in one SQL
|
|
134
|
+
* UPDATE, return the affected count plus a sample of ids so the caller (often
|
|
135
|
+
* an AI tool) can quote evidence back to the user.
|
|
136
|
+
*
|
|
137
|
+
* Refuses to run without at least one filter field (no "update everything"
|
|
138
|
+
* escape hatch) and without at least one set field. Also refuses a no-op
|
|
139
|
+
* recategorization where `set.account_id` equals `filter.account_id` —
|
|
140
|
+
* agents shouldn't waste tool calls on identity transforms.
|
|
141
|
+
*
|
|
142
|
+
* Safe-field policy mirrors `updatePosting`: account_id + memo only.
|
|
143
|
+
* Amount/currency corrections must go through delete + re-record to keep
|
|
144
|
+
* the transaction's debit=credit invariant intact.
|
|
145
|
+
*/
|
|
146
|
+
export function bulkUpdatePostings(db, filter, set) {
|
|
147
|
+
const filterFields = Object.keys(filter)
|
|
148
|
+
.filter((k) => filter[k] !== undefined && filter[k] !== "");
|
|
149
|
+
if (filterFields.length === 0) {
|
|
150
|
+
throw new Error("bulkUpdatePostings: at least one filter field is required.");
|
|
151
|
+
}
|
|
152
|
+
const setFields = Object.keys(set)
|
|
153
|
+
.filter((k) => set[k] !== undefined);
|
|
154
|
+
if (setFields.length === 0) {
|
|
155
|
+
throw new Error("bulkUpdatePostings: at least one set field is required.");
|
|
156
|
+
}
|
|
157
|
+
if (set.account_id !== undefined && set.account_id === filter.account_id) {
|
|
158
|
+
throw new Error("bulkUpdatePostings: set.account_id equals filter.account_id (no-op).");
|
|
159
|
+
}
|
|
160
|
+
const whereClauses = [];
|
|
161
|
+
const whereParams = [];
|
|
162
|
+
if (filter.account_id) {
|
|
163
|
+
whereClauses.push("p.account_id = ?");
|
|
164
|
+
whereParams.push(filter.account_id);
|
|
165
|
+
}
|
|
166
|
+
if (filter.currency) {
|
|
167
|
+
whereClauses.push("p.currency = ?");
|
|
168
|
+
whereParams.push(filter.currency);
|
|
169
|
+
}
|
|
170
|
+
if (filter.merchant_id) {
|
|
171
|
+
whereClauses.push("t.merchant_id = ?");
|
|
172
|
+
whereParams.push(filter.merchant_id);
|
|
173
|
+
}
|
|
174
|
+
if (filter.from) {
|
|
175
|
+
whereClauses.push("t.date >= ?");
|
|
176
|
+
whereParams.push(filter.from);
|
|
177
|
+
}
|
|
178
|
+
if (filter.to) {
|
|
179
|
+
whereClauses.push("t.date <= ?");
|
|
180
|
+
whereParams.push(filter.to);
|
|
181
|
+
}
|
|
182
|
+
if (filter.description_contains) {
|
|
183
|
+
whereClauses.push("LOWER(t.description) LIKE ?");
|
|
184
|
+
whereParams.push(`%${filter.description_contains.toLowerCase()}%`);
|
|
185
|
+
}
|
|
186
|
+
const matchIdsSql = `SELECT p.id
|
|
187
|
+
FROM postings p
|
|
188
|
+
JOIN transactions t ON t.id = p.transaction_id
|
|
189
|
+
WHERE ${whereClauses.join(" AND ")}`;
|
|
190
|
+
const sets = [];
|
|
191
|
+
const setParams = [];
|
|
192
|
+
if (set.account_id !== undefined) {
|
|
193
|
+
sets.push("account_id = ?");
|
|
194
|
+
setParams.push(set.account_id);
|
|
195
|
+
}
|
|
196
|
+
if (set.memo !== undefined) {
|
|
197
|
+
sets.push("memo = ?");
|
|
198
|
+
setParams.push(set.memo);
|
|
199
|
+
}
|
|
200
|
+
let affected = 0;
|
|
201
|
+
let sample = [];
|
|
202
|
+
const tx = db.transaction(() => {
|
|
203
|
+
const ids = db.prepare(matchIdsSql).all(...whereParams);
|
|
204
|
+
if (ids.length === 0)
|
|
205
|
+
return;
|
|
206
|
+
sample = ids.slice(0, 10).map((r) => r.id);
|
|
207
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
208
|
+
affected = db.prepare(`UPDATE postings SET ${sets.join(", ")} WHERE id IN (${placeholders})`).run(...setParams, ...ids.map((r) => r.id)).changes;
|
|
209
|
+
});
|
|
210
|
+
tx();
|
|
211
|
+
return { affected, sample_posting_ids: sample };
|
|
212
|
+
}
|
|
132
213
|
/**
|
|
133
214
|
* Heuristic duplicate finder: group transactions by (rounded total debit) and check
|
|
134
215
|
* pairs whose date difference is ≤ toleranceDays. Returns groups with ≥2 members.
|
|
@@ -370,3 +451,8 @@ export function countTransactions(db) {
|
|
|
370
451
|
.get();
|
|
371
452
|
return row;
|
|
372
453
|
}
|
|
454
|
+
export function countTransactionsBySourceFile(db, fileId) {
|
|
455
|
+
return db
|
|
456
|
+
.prepare(`SELECT COUNT(*) AS n FROM transactions WHERE source_file_id = ?`)
|
|
457
|
+
.get(fileId).n;
|
|
458
|
+
}
|
package/dist/db/schema.js
CHANGED
|
@@ -45,6 +45,8 @@ export function migrate(db) {
|
|
|
45
45
|
status TEXT NOT NULL CHECK(status IN ('pending','scanned','failed')),
|
|
46
46
|
raw_text TEXT,
|
|
47
47
|
scanned_at TEXT,
|
|
48
|
+
provider TEXT,
|
|
49
|
+
model TEXT,
|
|
48
50
|
error TEXT,
|
|
49
51
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
50
52
|
);
|
|
@@ -110,10 +112,12 @@ export function migrate(db) {
|
|
|
110
112
|
context_json TEXT,
|
|
111
113
|
answer TEXT,
|
|
112
114
|
resolved_at TEXT,
|
|
115
|
+
deferred_until TEXT,
|
|
113
116
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
114
117
|
);
|
|
115
118
|
|
|
116
119
|
CREATE INDEX IF NOT EXISTS questions_scan_idx ON questions(scan_id);
|
|
120
|
+
CREATE INDEX IF NOT EXISTS questions_deferred_idx ON questions(deferred_until);
|
|
117
121
|
|
|
118
122
|
CREATE TABLE IF NOT EXISTS conversation_history (
|
|
119
123
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -129,6 +133,19 @@ export function migrate(db) {
|
|
|
129
133
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
130
134
|
);
|
|
131
135
|
|
|
136
|
+
CREATE TABLE IF NOT EXISTS rules (
|
|
137
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
138
|
+
kind TEXT NOT NULL,
|
|
139
|
+
key TEXT NOT NULL,
|
|
140
|
+
target TEXT NOT NULL,
|
|
141
|
+
evidence_count INTEGER NOT NULL DEFAULT 1,
|
|
142
|
+
last_seen_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
143
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
144
|
+
UNIQUE(kind, key)
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
CREATE INDEX IF NOT EXISTS rules_kind_idx ON rules(kind);
|
|
148
|
+
|
|
132
149
|
CREATE TABLE IF NOT EXISTS settings (
|
|
133
150
|
key TEXT PRIMARY KEY,
|
|
134
151
|
value TEXT NOT NULL
|
|
@@ -1,8 +1,20 @@
|
|
|
1
1
|
import type Database from "libsql";
|
|
2
2
|
import type { ClosedQuestion } from "../db/queries/questions.js";
|
|
3
3
|
/**
|
|
4
|
-
* Compact every closed question
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Compact every closed question worth learning from into a `rules` row. The
|
|
5
|
+
* deterministic clarifier pass looks rules up by `(kind, key)` via the
|
|
6
|
+
* UNIQUE index, so each evidence event UPSERTs — incrementing
|
|
7
|
+
* `evidence_count` and refreshing `last_seen_at` on repeats rather than
|
|
8
|
+
* appending a near-duplicate.
|
|
9
|
+
*
|
|
10
|
+
* A closure is NOT learned (no rule synthesized) when any of:
|
|
11
|
+
* 1. `kind` is in `RULE_KIND_DENYLIST` — failure-class kinds carry no
|
|
12
|
+
* generalizable signal.
|
|
13
|
+
* 2. `answer` starts with `Skip` — skips are one-time recovery decisions,
|
|
14
|
+
* not patterns the next scan should auto-apply.
|
|
15
|
+
* 3. `rule_key` is null — without a structural key the rule could only
|
|
16
|
+
* match its own prose, which embeds dates/amounts and never re-fires.
|
|
17
|
+
*
|
|
18
|
+
* Returns the count of rules upserted (new or repeat-evidence).
|
|
7
19
|
*/
|
|
8
20
|
export declare function synthesizeMemoryRules(db: Database.Database, closures: readonly ClosedQuestion[]): number;
|
|
@@ -1,24 +1,45 @@
|
|
|
1
|
+
import { upsertRule } from "../db/queries/rules.js";
|
|
1
2
|
/**
|
|
2
|
-
* Compact every closed question
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Compact every closed question worth learning from into a `rules` row. The
|
|
4
|
+
* deterministic clarifier pass looks rules up by `(kind, key)` via the
|
|
5
|
+
* UNIQUE index, so each evidence event UPSERTs — incrementing
|
|
6
|
+
* `evidence_count` and refreshing `last_seen_at` on repeats rather than
|
|
7
|
+
* appending a near-duplicate.
|
|
8
|
+
*
|
|
9
|
+
* A closure is NOT learned (no rule synthesized) when any of:
|
|
10
|
+
* 1. `kind` is in `RULE_KIND_DENYLIST` — failure-class kinds carry no
|
|
11
|
+
* generalizable signal.
|
|
12
|
+
* 2. `answer` starts with `Skip` — skips are one-time recovery decisions,
|
|
13
|
+
* not patterns the next scan should auto-apply.
|
|
14
|
+
* 3. `rule_key` is null — without a structural key the rule could only
|
|
15
|
+
* match its own prose, which embeds dates/amounts and never re-fires.
|
|
16
|
+
*
|
|
17
|
+
* Returns the count of rules upserted (new or repeat-evidence).
|
|
5
18
|
*/
|
|
6
19
|
export function synthesizeMemoryRules(db, closures) {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const exists = db.prepare(`SELECT 1 FROM memories WHERE category = ? AND content = ? LIMIT 1`);
|
|
11
|
-
const insert = db.prepare(`INSERT INTO memories (content, category) VALUES (?, ?)`);
|
|
12
|
-
for (const c of closures) {
|
|
13
|
-
const body = formatRule(c);
|
|
14
|
-
if (exists.get("scanning_hint", body))
|
|
20
|
+
let upserted = 0;
|
|
21
|
+
for (const closure of closures) {
|
|
22
|
+
if (!isRuleSource(closure))
|
|
15
23
|
continue;
|
|
16
|
-
|
|
17
|
-
|
|
24
|
+
upsertRule(db, { kind: closure.kind, key: closure.rule_key, target: closure.answer.trim() });
|
|
25
|
+
upserted++;
|
|
18
26
|
}
|
|
19
|
-
return
|
|
27
|
+
return upserted;
|
|
20
28
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
29
|
+
const RULE_KIND_DENYLIST = new Set([
|
|
30
|
+
"dirty_input",
|
|
31
|
+
"scan_truncated",
|
|
32
|
+
"boundary_continuation",
|
|
33
|
+
]);
|
|
34
|
+
function isRuleSource(c) {
|
|
35
|
+
if (!c.kind || !c.rule_key)
|
|
36
|
+
return false;
|
|
37
|
+
if (RULE_KIND_DENYLIST.has(c.kind))
|
|
38
|
+
return false;
|
|
39
|
+
if (isSkipAnswer(c.answer))
|
|
40
|
+
return false;
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
function isSkipAnswer(answer) {
|
|
44
|
+
return answer.trim().toLowerCase().startsWith("skip");
|
|
24
45
|
}
|
|
@@ -36,6 +36,7 @@ export declare const CLARIFIER_PASSES: readonly ClarifierPass[];
|
|
|
36
36
|
* Single entry point shared by the in-scan resolve phase and the standalone
|
|
37
37
|
* `plasalid clarify` command. Runs deterministic passes first, then (when
|
|
38
38
|
* interactive) hands the leftovers to the LLM clarifier agent. Closed
|
|
39
|
-
* questions get
|
|
39
|
+
* questions get upserted into the rules table (keyed on the question's
|
|
40
|
+
* structural signature, not its prose).
|
|
40
41
|
*/
|
|
41
42
|
export declare function runClarify(opts: RunClarifyOpts): Promise<ClarifySummary>;
|
|
@@ -1,29 +1,38 @@
|
|
|
1
1
|
import { closeQuestion, listQuestions, countQuestions, } from "../db/queries/questions.js";
|
|
2
2
|
import { updatePosting } from "../db/queries/transactions.js";
|
|
3
|
+
import { findRule } from "../db/queries/rules.js";
|
|
3
4
|
import { runClarifyAgent } from "../ai/agent.js";
|
|
4
5
|
import { synthesizeMemoryRules } from "./clarifier-memory.js";
|
|
6
|
+
import { applyRecurrenceRules, generateRecurrenceCandidateQuestions, } from "./recurrence.js";
|
|
5
7
|
import { converge } from "./converge.js";
|
|
6
8
|
const MAX_AGENT_PASSES = 3;
|
|
7
9
|
/**
|
|
8
|
-
* Apply deterministic
|
|
9
|
-
*
|
|
10
|
+
* Apply deterministic resolution via a `(kind, key)` indexed lookup in the
|
|
11
|
+
* rules table. The rule's `key` was computed at question-creation time
|
|
12
|
+
* (see `src/scanner/committer.ts`) from a stable structural signature — merchant id,
|
|
13
|
+
* normalized descriptor, account pair — so the same pattern matches
|
|
14
|
+
* across scans regardless of date, amount, or prompt prose.
|
|
10
15
|
*/
|
|
11
16
|
const memoryRulePass = {
|
|
12
17
|
name: "memory_rule",
|
|
13
|
-
kinds: [
|
|
18
|
+
kinds: [
|
|
19
|
+
"uncategorized",
|
|
20
|
+
"uncategorized_expense",
|
|
21
|
+
"duplicate",
|
|
22
|
+
"correlation",
|
|
23
|
+
"similar_accounts",
|
|
24
|
+
"boundary_continuation",
|
|
25
|
+
"scan_truncated",
|
|
26
|
+
"unknown_merchant",
|
|
27
|
+
],
|
|
14
28
|
async tryResolve(u, ctx) {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
continue;
|
|
23
|
-
if (match.key === key)
|
|
24
|
-
return match.answer;
|
|
25
|
-
}
|
|
26
|
-
return null;
|
|
29
|
+
if (!u.kind)
|
|
30
|
+
return null;
|
|
31
|
+
const key = extractRuleKey(u.context_json);
|
|
32
|
+
if (!key)
|
|
33
|
+
return null;
|
|
34
|
+
const rule = findRule(ctx.db, u.kind, key);
|
|
35
|
+
return rule?.target ?? null;
|
|
27
36
|
},
|
|
28
37
|
};
|
|
29
38
|
/**
|
|
@@ -69,12 +78,19 @@ export const CLARIFIER_PASSES = [
|
|
|
69
78
|
* Single entry point shared by the in-scan resolve phase and the standalone
|
|
70
79
|
* `plasalid clarify` command. Runs deterministic passes first, then (when
|
|
71
80
|
* interactive) hands the leftovers to the LLM clarifier agent. Closed
|
|
72
|
-
* questions get
|
|
81
|
+
* questions get upserted into the rules table (keyed on the question's
|
|
82
|
+
* structural signature, not its prose).
|
|
73
83
|
*/
|
|
74
84
|
export async function runClarify(opts) {
|
|
75
85
|
const { db } = opts;
|
|
76
86
|
const tally = {};
|
|
77
87
|
const closures = [];
|
|
88
|
+
const autoLinked = applyRecurrenceRules(db).linked;
|
|
89
|
+
if (autoLinked > 0)
|
|
90
|
+
tally["recurrence_auto_link"] = autoLinked;
|
|
91
|
+
const generated = generateRecurrenceCandidateQuestions(db, opts.scanId ?? null);
|
|
92
|
+
if (generated > 0)
|
|
93
|
+
tally["recurrence_generation"] = generated;
|
|
78
94
|
const initial = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
|
|
79
95
|
const total = initial.length;
|
|
80
96
|
if (total === 0) {
|
|
@@ -182,16 +198,14 @@ function parseOptions(json) {
|
|
|
182
198
|
return [];
|
|
183
199
|
}
|
|
184
200
|
}
|
|
185
|
-
function
|
|
186
|
-
|
|
187
|
-
}
|
|
188
|
-
function parseRule(body) {
|
|
189
|
-
const idx = body.lastIndexOf(" -> ");
|
|
190
|
-
if (idx < 0)
|
|
201
|
+
function extractRuleKey(contextJson) {
|
|
202
|
+
if (!contextJson)
|
|
191
203
|
return null;
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
204
|
+
try {
|
|
205
|
+
const parsed = JSON.parse(contextJson);
|
|
206
|
+
return typeof parsed?.rule_key === "string" ? parsed.rule_key : null;
|
|
207
|
+
}
|
|
208
|
+
catch {
|
|
195
209
|
return null;
|
|
196
|
-
|
|
210
|
+
}
|
|
197
211
|
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import { type TransactionInput } from "../db/queries/transactions.js";
|
|
3
|
+
/**
|
|
4
|
+
* Staged best-effort transaction commit.
|
|
5
|
+
*
|
|
6
|
+
* Each stage returns a tagged union. Side effects (raising questions,
|
|
7
|
+
* progress emission, placeholder account creation) flow through the
|
|
8
|
+
* `CommitHooks` interface so the pipeline stays pure-ish and testable.
|
|
9
|
+
*
|
|
10
|
+
* The only legitimate drop path is a `dirty_input` validation failure
|
|
11
|
+
* (no date, malformed amount, etc.). Every other resolution problem —
|
|
12
|
+
* unknown merchant, unknown account — is rescued in-place: NULL the
|
|
13
|
+
* merchant, fuzzy-match-or-create the account, raise a typed question
|
|
14
|
+
* for the clarifier to review later.
|
|
15
|
+
*/
|
|
16
|
+
export interface CommitContext {
|
|
17
|
+
readonly scanId: string | null;
|
|
18
|
+
readonly fileId: string | null;
|
|
19
|
+
readonly chunkId: string | null;
|
|
20
|
+
readonly progress: ProgressEmitter | null;
|
|
21
|
+
}
|
|
22
|
+
export interface ProgressEmitter {
|
|
23
|
+
emit(event: {
|
|
24
|
+
chunkId: string;
|
|
25
|
+
kind: "tx" | "question";
|
|
26
|
+
}): void;
|
|
27
|
+
}
|
|
28
|
+
export type CommitOutcome = {
|
|
29
|
+
ok: true;
|
|
30
|
+
transactionId: string;
|
|
31
|
+
raisedQuestions: number;
|
|
32
|
+
} | {
|
|
33
|
+
ok: false;
|
|
34
|
+
reason: DropReason;
|
|
35
|
+
message: string;
|
|
36
|
+
raisedQuestions: number;
|
|
37
|
+
};
|
|
38
|
+
export type DropReason = "dirty_input";
|
|
39
|
+
export interface CommitHooks {
|
|
40
|
+
onCommitted(transactionId: string): void;
|
|
41
|
+
onDirtyInput(input: TransactionInput, reason: string): void;
|
|
42
|
+
onUnknownMerchant(input: TransactionInput, transactionId: string, attemptedId: string): void;
|
|
43
|
+
onPlaceholderAccount(accountId: string, transactionId: string): void;
|
|
44
|
+
onSimilarAccount(originalId: string, matchedId: string, transactionId: string): void;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Default hook wiring: raises typed questions into the DB, ticks the
|
|
48
|
+
* progress emitter. Tests substitute their own hooks to inspect events
|
|
49
|
+
* without touching the question table.
|
|
50
|
+
*
|
|
51
|
+
* Question writes are gated on `ctx.scanId` — outside a scan there is no
|
|
52
|
+
* audit trail to attach to, so best-effort resolution still happens but
|
|
53
|
+
* the typed question is suppressed.
|
|
54
|
+
*/
|
|
55
|
+
export declare function defaultCommitHooks(db: Database.Database, ctx: CommitContext): CommitHooks;
|
|
56
|
+
export declare function runCommitPipeline(db: Database.Database, ctx: CommitContext, input: TransactionInput, hooks?: CommitHooks): CommitOutcome;
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import { validateTransaction, insertTransactionRows, } from "../db/queries/transactions.js";
|
|
2
|
+
import { createAccount, findAccountById, findAccountsByFuzzyName, ensureStructuralAccount, ensureTopLevelRoot, TOP_LEVEL_TYPES, } from "../db/queries/account-balance.js";
|
|
3
|
+
import { recordQuestion } from "../db/queries/questions.js";
|
|
4
|
+
import { accountIdKey, accountPairKey, descriptorKey, } from "./rule-keys.js";
|
|
5
|
+
/**
|
|
6
|
+
* Default hook wiring: raises typed questions into the DB, ticks the
|
|
7
|
+
* progress emitter. Tests substitute their own hooks to inspect events
|
|
8
|
+
* without touching the question table.
|
|
9
|
+
*
|
|
10
|
+
* Question writes are gated on `ctx.scanId` — outside a scan there is no
|
|
11
|
+
* audit trail to attach to, so best-effort resolution still happens but
|
|
12
|
+
* the typed question is suppressed.
|
|
13
|
+
*/
|
|
14
|
+
export function defaultCommitHooks(db, ctx) {
|
|
15
|
+
const tick = (kind) => {
|
|
16
|
+
if (ctx.progress && ctx.chunkId)
|
|
17
|
+
ctx.progress.emit({ chunkId: ctx.chunkId, kind });
|
|
18
|
+
};
|
|
19
|
+
const raise = (input) => {
|
|
20
|
+
if (!ctx.scanId)
|
|
21
|
+
return;
|
|
22
|
+
recordQuestion(db, { ...input, file_id: ctx.fileId, scan_id: ctx.scanId });
|
|
23
|
+
tick("question");
|
|
24
|
+
};
|
|
25
|
+
return {
|
|
26
|
+
onCommitted: () => tick("tx"),
|
|
27
|
+
onDirtyInput: (input, reason) => raise({
|
|
28
|
+
transaction_id: null,
|
|
29
|
+
account_id: null,
|
|
30
|
+
kind: "dirty_input",
|
|
31
|
+
prompt: `The scanner returned a row that couldn't be validated: ${reason}. ` +
|
|
32
|
+
`Raw description: "${input.description}" on ${input.date}.`,
|
|
33
|
+
context: { description: input.description, date: input.date, reason },
|
|
34
|
+
}),
|
|
35
|
+
onUnknownMerchant: (input, transactionId, attemptedId) => {
|
|
36
|
+
const descriptor = input.raw_descriptor || input.description;
|
|
37
|
+
raise({
|
|
38
|
+
transaction_id: transactionId,
|
|
39
|
+
account_id: null,
|
|
40
|
+
kind: "unknown_merchant",
|
|
41
|
+
prompt: `The scanner referenced merchant id "${attemptedId}" but no such merchant exists. ` +
|
|
42
|
+
`Link "${descriptor}" to an existing merchant or leave it unlinked.`,
|
|
43
|
+
context: { rule_key: descriptorKey(descriptor), descriptor, attempted_id: attemptedId },
|
|
44
|
+
});
|
|
45
|
+
},
|
|
46
|
+
onPlaceholderAccount: (accountId, transactionId) => raise({
|
|
47
|
+
transaction_id: transactionId,
|
|
48
|
+
account_id: accountId,
|
|
49
|
+
kind: "uncategorized",
|
|
50
|
+
prompt: `A placeholder account was created for posting "${accountId}". ` +
|
|
51
|
+
`Confirm the category, merge into an existing account, or rename.`,
|
|
52
|
+
context: { rule_key: accountIdKey(accountId), placeholder_id: accountId },
|
|
53
|
+
}),
|
|
54
|
+
onSimilarAccount: (originalId, matchedId, transactionId) => raise({
|
|
55
|
+
transaction_id: transactionId,
|
|
56
|
+
account_id: matchedId,
|
|
57
|
+
kind: "similar_accounts",
|
|
58
|
+
prompt: `The scanner referenced "${originalId}" — the closest existing account is "${matchedId}". ` +
|
|
59
|
+
`Confirm they are the same, or split them apart.`,
|
|
60
|
+
context: {
|
|
61
|
+
rule_key: accountPairKey(originalId, matchedId),
|
|
62
|
+
original_id: originalId,
|
|
63
|
+
matched_id: matchedId,
|
|
64
|
+
},
|
|
65
|
+
}),
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
export function runCommitPipeline(db, ctx, input, hooks = defaultCommitHooks(db, ctx)) {
|
|
69
|
+
const validation = stageValidate(input);
|
|
70
|
+
if (!validation.ok) {
|
|
71
|
+
hooks.onDirtyInput(input, validation.reason);
|
|
72
|
+
return { ok: false, reason: "dirty_input", message: validation.reason, raisedQuestions: 1 };
|
|
73
|
+
}
|
|
74
|
+
const merchant = stageResolveMerchant(db, validation.validated);
|
|
75
|
+
const accounts = stageResolveAccounts(db, validation.validated);
|
|
76
|
+
const committed = {
|
|
77
|
+
...validation.validated,
|
|
78
|
+
merchant_id: merchant.merchantId,
|
|
79
|
+
postings: accounts.postings,
|
|
80
|
+
};
|
|
81
|
+
const tx = db.transaction(() => insertTransactionRows(db, committed));
|
|
82
|
+
tx();
|
|
83
|
+
hooks.onCommitted(committed.id);
|
|
84
|
+
const raised = applyHints({ hooks, transactionId: committed.id, merchant, accounts, input });
|
|
85
|
+
return { ok: true, transactionId: committed.id, raisedQuestions: raised };
|
|
86
|
+
}
|
|
87
|
+
function stageValidate(input) {
|
|
88
|
+
try {
|
|
89
|
+
return { ok: true, validated: validateTransaction(input) };
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
return { ok: false, reason: err?.message ?? String(err) };
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
function stageResolveMerchant(db, input) {
|
|
96
|
+
if (!input.merchant_id)
|
|
97
|
+
return { merchantId: null, attemptedUnknownId: null };
|
|
98
|
+
const exists = db.prepare(`SELECT 1 FROM merchants WHERE id = ?`).get(input.merchant_id);
|
|
99
|
+
if (exists)
|
|
100
|
+
return { merchantId: input.merchant_id, attemptedUnknownId: null };
|
|
101
|
+
return { merchantId: null, attemptedUnknownId: input.merchant_id };
|
|
102
|
+
}
|
|
103
|
+
function stageResolveAccounts(db, input) {
|
|
104
|
+
const postings = [];
|
|
105
|
+
const hints = [];
|
|
106
|
+
for (const p of input.postings) {
|
|
107
|
+
const resolved = resolveOnePosting(db, p);
|
|
108
|
+
postings.push(resolved.posting);
|
|
109
|
+
if (resolved.hint)
|
|
110
|
+
hints.push(resolved.hint);
|
|
111
|
+
}
|
|
112
|
+
return { postings, hints };
|
|
113
|
+
}
|
|
114
|
+
function resolveOnePosting(db, posting) {
|
|
115
|
+
if (findAccountById(db, posting.account_id)) {
|
|
116
|
+
return { posting, hint: null };
|
|
117
|
+
}
|
|
118
|
+
const matched = bestFuzzyMatch(db, posting.account_id);
|
|
119
|
+
if (matched) {
|
|
120
|
+
return {
|
|
121
|
+
posting: { ...posting, account_id: matched },
|
|
122
|
+
hint: { type: "similar_matched", originalId: posting.account_id, matchedId: matched },
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
const placeholderId = ensurePlaceholderAccount(db, posting.account_id);
|
|
126
|
+
return {
|
|
127
|
+
posting: { ...posting, account_id: placeholderId },
|
|
128
|
+
hint: { type: "placeholder_created", accountId: placeholderId },
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
const FUZZY_THRESHOLD = 0.7;
|
|
132
|
+
function bestFuzzyMatch(db, accountId) {
|
|
133
|
+
const leaf = leafSegment(accountId).replace(/[-_]+/g, " ");
|
|
134
|
+
if (!leaf)
|
|
135
|
+
return null;
|
|
136
|
+
const matches = findAccountsByFuzzyName(db, leaf, FUZZY_THRESHOLD);
|
|
137
|
+
return matches[0]?.account.id ?? null;
|
|
138
|
+
}
|
|
139
|
+
function leafSegment(id) {
|
|
140
|
+
const segments = id.split(":");
|
|
141
|
+
return segments[segments.length - 1] ?? id;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Create the agent-supplied account id (and any missing intermediate parents)
|
|
145
|
+
* as placeholders so the transaction can land. If the id's top-level segment
|
|
146
|
+
* isn't a known account type, fall back to `expense:uncategorized`.
|
|
147
|
+
*/
|
|
148
|
+
function ensurePlaceholderAccount(db, accountId) {
|
|
149
|
+
const segments = accountId.split(":").filter(Boolean);
|
|
150
|
+
if (segments.length === 0)
|
|
151
|
+
return ensureUncategorizedFallback(db);
|
|
152
|
+
const type = segments[0];
|
|
153
|
+
if (!TOP_LEVEL_TYPES.includes(type))
|
|
154
|
+
return ensureUncategorizedFallback(db);
|
|
155
|
+
ensureTopLevelRoot(db, type);
|
|
156
|
+
for (let i = 2; i <= segments.length; i++) {
|
|
157
|
+
const id = segments.slice(0, i).join(":");
|
|
158
|
+
if (findAccountById(db, id))
|
|
159
|
+
continue;
|
|
160
|
+
const parentId = i === 1 ? null : segments.slice(0, i - 1).join(":");
|
|
161
|
+
const name = humanizeSegment(segments[i - 1]);
|
|
162
|
+
try {
|
|
163
|
+
createAccount(db, { id, name, type, parent_id: parentId });
|
|
164
|
+
}
|
|
165
|
+
catch (err) {
|
|
166
|
+
if (err?.code === "ACCOUNT_EXISTS")
|
|
167
|
+
continue;
|
|
168
|
+
return ensureUncategorizedFallback(db);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return accountId;
|
|
172
|
+
}
|
|
173
|
+
function ensureUncategorizedFallback(db) {
|
|
174
|
+
ensureStructuralAccount(db, "expense:uncategorized");
|
|
175
|
+
return "expense:uncategorized";
|
|
176
|
+
}
|
|
177
|
+
function humanizeSegment(segment) {
|
|
178
|
+
const spaced = segment.replace(/[-_]+/g, " ").trim();
|
|
179
|
+
if (!spaced)
|
|
180
|
+
return "Placeholder";
|
|
181
|
+
return spaced.replace(/\b\w/g, (c) => c.toUpperCase());
|
|
182
|
+
}
|
|
183
|
+
function applyHints(args) {
|
|
184
|
+
let raised = 0;
|
|
185
|
+
if (args.merchant.attemptedUnknownId) {
|
|
186
|
+
args.hooks.onUnknownMerchant(args.input, args.transactionId, args.merchant.attemptedUnknownId);
|
|
187
|
+
raised++;
|
|
188
|
+
}
|
|
189
|
+
for (const hint of args.accounts.hints) {
|
|
190
|
+
dispatchHint(hint, args.hooks, args.transactionId);
|
|
191
|
+
raised++;
|
|
192
|
+
}
|
|
193
|
+
return raised;
|
|
194
|
+
}
|
|
195
|
+
function dispatchHint(hint, hooks, transactionId) {
|
|
196
|
+
switch (hint.type) {
|
|
197
|
+
case "placeholder_created":
|
|
198
|
+
hooks.onPlaceholderAccount(hint.accountId, transactionId);
|
|
199
|
+
return;
|
|
200
|
+
case "similar_matched":
|
|
201
|
+
hooks.onSimilarAccount(hint.originalId, hint.matchedId, transactionId);
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import { type TransactionInput } from "../db/queries/transactions.js";
|
|
3
|
+
/**
|
|
4
|
+
* Staged best-effort transaction commit.
|
|
5
|
+
*
|
|
6
|
+
* Each stage returns a tagged union. Side effects (raising questions,
|
|
7
|
+
* progress emission, placeholder account creation) flow through the
|
|
8
|
+
* `CommitHooks` interface so the pipeline stays pure-ish and testable.
|
|
9
|
+
*
|
|
10
|
+
* The only legitimate drop path is a `dirty_input` validation failure
|
|
11
|
+
* (no date, malformed amount, etc.). Every other resolution problem —
|
|
12
|
+
* unknown merchant, unknown account — is rescued in-place: NULL the
|
|
13
|
+
* merchant, fuzzy-match-or-create the account, raise a typed question
|
|
14
|
+
* for the clarifier to review later.
|
|
15
|
+
*/
|
|
16
|
+
export interface CommitContext {
|
|
17
|
+
readonly scanId: string | null;
|
|
18
|
+
readonly fileId: string | null;
|
|
19
|
+
readonly chunkId: string | null;
|
|
20
|
+
readonly progress: ProgressEmitter | null;
|
|
21
|
+
}
|
|
22
|
+
export interface ProgressEmitter {
|
|
23
|
+
emit(event: {
|
|
24
|
+
chunkId: string;
|
|
25
|
+
kind: "tx" | "question";
|
|
26
|
+
}): void;
|
|
27
|
+
}
|
|
28
|
+
export type CommitOutcome = {
|
|
29
|
+
ok: true;
|
|
30
|
+
transactionId: string;
|
|
31
|
+
raisedQuestions: number;
|
|
32
|
+
} | {
|
|
33
|
+
ok: false;
|
|
34
|
+
reason: DropReason;
|
|
35
|
+
message: string;
|
|
36
|
+
raisedQuestions: number;
|
|
37
|
+
};
|
|
38
|
+
export type DropReason = "dirty_input";
|
|
39
|
+
export interface CommitHooks {
|
|
40
|
+
onCommitted(transactionId: string): void;
|
|
41
|
+
onDirtyInput(input: TransactionInput, reason: string): void;
|
|
42
|
+
onUnknownMerchant(input: TransactionInput, transactionId: string, attemptedId: string): void;
|
|
43
|
+
onPlaceholderAccount(accountId: string, transactionId: string): void;
|
|
44
|
+
onSimilarAccount(originalId: string, matchedId: string, transactionId: string): void;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Default hook wiring: raises typed questions into the DB, ticks the
|
|
48
|
+
* progress emitter. Tests substitute their own hooks to inspect events
|
|
49
|
+
* without touching the question table.
|
|
50
|
+
*
|
|
51
|
+
* Question writes are gated on `ctx.scanId` — outside a scan there is no
|
|
52
|
+
* audit trail to attach to, so best-effort resolution still happens but
|
|
53
|
+
* the typed question is suppressed.
|
|
54
|
+
*/
|
|
55
|
+
export declare function defaultCommitHooks(db: Database.Database, ctx: CommitContext): CommitHooks;
|
|
56
|
+
export declare function commitTransaction(db: Database.Database, ctx: CommitContext, input: TransactionInput, hooks?: CommitHooks): CommitOutcome;
|