plasalid 0.8.3 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/dist/ai/personas.js +29 -6
- package/dist/ai/prompt-sections.d.ts +10 -0
- package/dist/ai/prompt-sections.js +29 -0
- package/dist/ai/system-prompt.js +10 -6
- package/dist/ai/tools/clarify.js +35 -0
- package/dist/ai/tools/common.js +3 -2
- package/dist/ai/tools/index.js +6 -3
- package/dist/ai/tools/ingest.js +47 -35
- package/dist/ai/tools/mutate.d.ts +2 -0
- package/dist/ai/tools/mutate.js +81 -0
- package/dist/cli/commands/files.d.ts +7 -0
- package/dist/cli/commands/files.js +24 -0
- package/dist/cli/commands/rules.js +23 -20
- package/dist/cli/commands/scan.js +8 -3
- package/dist/cli/helper.d.ts +9 -1
- package/dist/cli/helper.js +17 -2
- package/dist/cli/index.js +12 -0
- package/dist/cli/ink/ChatApp.js +1 -1
- package/dist/cli/ink/FilesBrowser.d.ts +7 -0
- package/dist/cli/ink/FilesBrowser.js +103 -0
- package/dist/cli/ink/ListBrowser.d.ts +9 -1
- package/dist/cli/ink/ListBrowser.js +2 -2
- package/dist/cli/ink/PromptFrame.js +1 -1
- package/dist/cli/ink/ScanDashboard.js +90 -65
- package/dist/cli/ink/hooks/useFooterText.d.ts +1 -2
- package/dist/cli/ink/hooks/useFooterText.js +11 -24
- package/dist/db/queries/files.d.ts +29 -0
- package/dist/db/queries/files.js +34 -0
- package/dist/db/queries/questions.d.ts +17 -0
- package/dist/db/queries/questions.js +47 -9
- package/dist/db/queries/rules.d.ts +31 -0
- package/dist/db/queries/rules.js +55 -0
- package/dist/db/queries/transactions.d.ts +34 -0
- package/dist/db/queries/transactions.js +86 -0
- package/dist/db/schema.js +17 -0
- package/dist/scanner/clarifier-memory.d.ts +15 -3
- package/dist/scanner/clarifier-memory.js +38 -17
- package/dist/scanner/clarifier.d.ts +2 -1
- package/dist/scanner/clarifier.js +40 -26
- package/dist/scanner/commit-pipeline.d.ts +56 -0
- package/dist/scanner/commit-pipeline.js +204 -0
- package/dist/scanner/committer.d.ts +56 -0
- package/dist/scanner/committer.js +204 -0
- package/dist/scanner/parse.js +25 -7
- package/dist/scanner/recurrence-pipeline.d.ts +28 -0
- package/dist/scanner/recurrence-pipeline.js +126 -0
- package/dist/scanner/recurrence.d.ts +28 -0
- package/dist/scanner/recurrence.js +155 -0
- package/dist/scanner/rule-keys.d.ts +13 -0
- package/dist/scanner/rule-keys.js +28 -0
- package/dist/scanner/rules.d.ts +13 -0
- package/dist/scanner/rules.js +28 -0
- package/package.json +1 -1
|
@@ -22,12 +22,17 @@ export interface QuestionRow {
|
|
|
22
22
|
prompt: string;
|
|
23
23
|
options_json: string | null;
|
|
24
24
|
context_json: string | null;
|
|
25
|
+
deferred_until: string | null;
|
|
25
26
|
created_at: string;
|
|
26
27
|
}
|
|
27
28
|
export interface ClosedQuestion {
|
|
28
29
|
prompt: string;
|
|
29
30
|
kind: string | null;
|
|
30
31
|
answer: string;
|
|
32
|
+
/** Stable signature pulled from the question's context_json. When set, the
|
|
33
|
+
* rule synthesizer keys the learned rule on this (so future questions with
|
|
34
|
+
* different prose but the same key match). When null, no rule is learned. */
|
|
35
|
+
rule_key: string | null;
|
|
31
36
|
}
|
|
32
37
|
/**
|
|
33
38
|
* Insert a new questions row and flip the `has_question` boolean on whichever
|
|
@@ -53,10 +58,22 @@ export interface CountQuestionsScope {
|
|
|
53
58
|
account_id?: string;
|
|
54
59
|
kind?: string;
|
|
55
60
|
scan_id?: string;
|
|
61
|
+
/** When true, count deferred rows too (default false — defer hides). */
|
|
62
|
+
includeDeferred?: boolean;
|
|
56
63
|
}
|
|
57
64
|
export declare function countQuestions(db: Database.Database, scope?: CountQuestionsScope): number;
|
|
58
65
|
export interface ListQuestionsOptions {
|
|
59
66
|
limit?: number;
|
|
60
67
|
scanId?: string;
|
|
68
|
+
/** When true, include deferred rows in the result (default false). */
|
|
69
|
+
includeDeferred?: boolean;
|
|
61
70
|
}
|
|
62
71
|
export declare function listQuestions(db: Database.Database, opts?: ListQuestionsOptions): QuestionRow[];
|
|
72
|
+
/**
|
|
73
|
+
* Mark a question as deferred for `days` days from now. The default
|
|
74
|
+
* `listQuestions` / `countQuestions` filter hides deferred rows until the
|
|
75
|
+
* timestamp passes, so the clarifier won't re-encounter the question on the
|
|
76
|
+
* next run. Pass `includeDeferred: true` to those functions for an
|
|
77
|
+
* unfiltered view (e.g. for the rules / files browsers).
|
|
78
|
+
*/
|
|
79
|
+
export declare function deferQuestion(db: Database.Database, id: string, days: number): boolean;
|
|
@@ -24,7 +24,7 @@ export function recordQuestion(db, input) {
|
|
|
24
24
|
*/
|
|
25
25
|
export function closeQuestion(db, id, answer) {
|
|
26
26
|
const row = db
|
|
27
|
-
.prepare(`SELECT prompt, kind, transaction_id, account_id FROM questions WHERE id = ?`)
|
|
27
|
+
.prepare(`SELECT prompt, kind, transaction_id, account_id, context_json FROM questions WHERE id = ?`)
|
|
28
28
|
.get(id);
|
|
29
29
|
if (!row)
|
|
30
30
|
return null;
|
|
@@ -33,7 +33,23 @@ export function closeQuestion(db, id, answer) {
|
|
|
33
33
|
transaction_id: row.transaction_id,
|
|
34
34
|
account_id: row.account_id,
|
|
35
35
|
});
|
|
36
|
-
return {
|
|
36
|
+
return {
|
|
37
|
+
prompt: row.prompt,
|
|
38
|
+
kind: row.kind,
|
|
39
|
+
answer,
|
|
40
|
+
rule_key: extractRuleKey(row.context_json),
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
function extractRuleKey(contextJson) {
|
|
44
|
+
if (!contextJson)
|
|
45
|
+
return null;
|
|
46
|
+
try {
|
|
47
|
+
const parsed = JSON.parse(contextJson);
|
|
48
|
+
return typeof parsed?.rule_key === "string" ? parsed.rule_key : null;
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
37
53
|
}
|
|
38
54
|
/**
|
|
39
55
|
* Look up the transaction/account a question is attached to. Returns null when
|
|
@@ -65,6 +81,7 @@ function maybeClearHasQuestionFlags(db, target) {
|
|
|
65
81
|
db.prepare(`UPDATE accounts SET has_question = 0 WHERE id = ?`).run(target.account_id);
|
|
66
82
|
}
|
|
67
83
|
}
|
|
84
|
+
const ACTIVE_DEFERRED_CLAUSE = "(deferred_until IS NULL OR deferred_until <= datetime('now'))";
|
|
68
85
|
export function countQuestions(db, scope = {}) {
|
|
69
86
|
const conditions = [];
|
|
70
87
|
const params = [];
|
|
@@ -88,23 +105,44 @@ export function countQuestions(db, scope = {}) {
|
|
|
88
105
|
conditions.push("scan_id = ?");
|
|
89
106
|
params.push(scope.scan_id);
|
|
90
107
|
}
|
|
108
|
+
if (!scope.includeDeferred)
|
|
109
|
+
conditions.push(ACTIVE_DEFERRED_CLAUSE);
|
|
91
110
|
const where = conditions.length ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
92
111
|
const row = db
|
|
93
112
|
.prepare(`SELECT COUNT(*) AS n FROM questions ${where}`)
|
|
94
113
|
.get(...params);
|
|
95
114
|
return row.n;
|
|
96
115
|
}
|
|
116
|
+
const ROW_COLUMNS = "id, scan_id, file_id, transaction_id, account_id, kind, prompt, options_json, context_json, deferred_until, created_at";
|
|
97
117
|
export function listQuestions(db, opts = {}) {
|
|
98
118
|
const capped = Math.min(Math.max(opts.limit ?? 200, 1), 1000);
|
|
119
|
+
const conditions = [];
|
|
120
|
+
const params = [];
|
|
99
121
|
if (opts.scanId) {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
WHERE scan_id = ?
|
|
103
|
-
ORDER BY created_at ASC
|
|
104
|
-
LIMIT ?`).all(opts.scanId, capped);
|
|
122
|
+
conditions.push("scan_id = ?");
|
|
123
|
+
params.push(opts.scanId);
|
|
105
124
|
}
|
|
106
|
-
|
|
125
|
+
if (!opts.includeDeferred)
|
|
126
|
+
conditions.push(ACTIVE_DEFERRED_CLAUSE);
|
|
127
|
+
const where = conditions.length ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
128
|
+
params.push(capped);
|
|
129
|
+
return db.prepare(`SELECT ${ROW_COLUMNS}
|
|
107
130
|
FROM questions
|
|
131
|
+
${where}
|
|
108
132
|
ORDER BY created_at ASC
|
|
109
|
-
LIMIT ?`).all(
|
|
133
|
+
LIMIT ?`).all(...params);
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Mark a question as deferred for `days` days from now. The default
|
|
137
|
+
* `listQuestions` / `countQuestions` filter hides deferred rows until the
|
|
138
|
+
* timestamp passes, so the clarifier won't re-encounter the question on the
|
|
139
|
+
* next run. Pass `includeDeferred: true` to those functions for an
|
|
140
|
+
* unfiltered view (e.g. for the rules / files browsers).
|
|
141
|
+
*/
|
|
142
|
+
export function deferQuestion(db, id, days) {
|
|
143
|
+
const safeDays = Math.max(1, Math.floor(days));
|
|
144
|
+
const result = db
|
|
145
|
+
.prepare(`UPDATE questions SET deferred_until = datetime('now', ?) WHERE id = ?`)
|
|
146
|
+
.run(`+${safeDays} days`, id);
|
|
147
|
+
return result.changes > 0;
|
|
110
148
|
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
export interface Rule {
|
|
3
|
+
id: number;
|
|
4
|
+
kind: string;
|
|
5
|
+
key: string;
|
|
6
|
+
target: string;
|
|
7
|
+
evidence_count: number;
|
|
8
|
+
last_seen_at: string;
|
|
9
|
+
created_at: string;
|
|
10
|
+
}
|
|
11
|
+
export interface UpsertRuleInput {
|
|
12
|
+
kind: string;
|
|
13
|
+
key: string;
|
|
14
|
+
target: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Insert a rule keyed on (kind, key), or — if one already exists — bump
|
|
18
|
+
* `evidence_count`, refresh `last_seen_at`, and overwrite `target` with the
|
|
19
|
+
* latest answer. The deterministic clarifier pass looks rules up via the
|
|
20
|
+
* UNIQUE(kind, key) index, so this is the only write path that keeps the
|
|
21
|
+
* rule store sparse and indexed.
|
|
22
|
+
*/
|
|
23
|
+
export declare function upsertRule(db: Database.Database, input: UpsertRuleInput): Rule;
|
|
24
|
+
export declare function findRule(db: Database.Database, kind: string, key: string): Rule | null;
|
|
25
|
+
export interface ListRulesOptions {
|
|
26
|
+
kind?: string;
|
|
27
|
+
limit?: number;
|
|
28
|
+
}
|
|
29
|
+
export declare function listRules(db: Database.Database, opts?: ListRulesOptions): Rule[];
|
|
30
|
+
export declare function countRules(db: Database.Database): number;
|
|
31
|
+
export declare function deleteRule(db: Database.Database, id: number): Rule | null;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Insert a rule keyed on (kind, key), or — if one already exists — bump
|
|
3
|
+
* `evidence_count`, refresh `last_seen_at`, and overwrite `target` with the
|
|
4
|
+
* latest answer. The deterministic clarifier pass looks rules up via the
|
|
5
|
+
* UNIQUE(kind, key) index, so this is the only write path that keeps the
|
|
6
|
+
* rule store sparse and indexed.
|
|
7
|
+
*/
|
|
8
|
+
export function upsertRule(db, input) {
|
|
9
|
+
db.prepare(`INSERT INTO rules (kind, key, target)
|
|
10
|
+
VALUES (?, ?, ?)
|
|
11
|
+
ON CONFLICT(kind, key) DO UPDATE SET
|
|
12
|
+
target = excluded.target,
|
|
13
|
+
evidence_count = evidence_count + 1,
|
|
14
|
+
last_seen_at = datetime('now')`).run(input.kind, input.key, input.target);
|
|
15
|
+
const row = findRule(db, input.kind, input.key);
|
|
16
|
+
if (!row)
|
|
17
|
+
throw new Error(`upsertRule: row vanished after upsert (${input.kind}, ${input.key})`);
|
|
18
|
+
return row;
|
|
19
|
+
}
|
|
20
|
+
export function findRule(db, kind, key) {
|
|
21
|
+
const row = db
|
|
22
|
+
.prepare(`SELECT id, kind, key, target, evidence_count, last_seen_at, created_at
|
|
23
|
+
FROM rules WHERE kind = ? AND key = ?`)
|
|
24
|
+
.get(kind, key);
|
|
25
|
+
return row ?? null;
|
|
26
|
+
}
|
|
27
|
+
export function listRules(db, opts = {}) {
|
|
28
|
+
const limit = Math.min(Math.max(opts.limit ?? 500, 1), 5000);
|
|
29
|
+
if (opts.kind) {
|
|
30
|
+
return db
|
|
31
|
+
.prepare(`SELECT id, kind, key, target, evidence_count, last_seen_at, created_at
|
|
32
|
+
FROM rules WHERE kind = ?
|
|
33
|
+
ORDER BY last_seen_at DESC LIMIT ?`)
|
|
34
|
+
.all(opts.kind, limit);
|
|
35
|
+
}
|
|
36
|
+
return db
|
|
37
|
+
.prepare(`SELECT id, kind, key, target, evidence_count, last_seen_at, created_at
|
|
38
|
+
FROM rules
|
|
39
|
+
ORDER BY last_seen_at DESC LIMIT ?`)
|
|
40
|
+
.all(limit);
|
|
41
|
+
}
|
|
42
|
+
export function countRules(db) {
|
|
43
|
+
const row = db.prepare(`SELECT COUNT(*) AS n FROM rules`).get();
|
|
44
|
+
return row.n;
|
|
45
|
+
}
|
|
46
|
+
export function deleteRule(db, id) {
|
|
47
|
+
const row = db
|
|
48
|
+
.prepare(`SELECT id, kind, key, target, evidence_count, last_seen_at, created_at
|
|
49
|
+
FROM rules WHERE id = ?`)
|
|
50
|
+
.get(id);
|
|
51
|
+
if (!row)
|
|
52
|
+
return null;
|
|
53
|
+
db.prepare(`DELETE FROM rules WHERE id = ?`).run(id);
|
|
54
|
+
return row;
|
|
55
|
+
}
|
|
@@ -89,6 +89,39 @@ export declare function updatePosting(db: Database.Database, postingId: string,
|
|
|
89
89
|
* the postings automatically.
|
|
90
90
|
*/
|
|
91
91
|
export declare function deleteTransaction(db: Database.Database, transactionId: string): number;
|
|
92
|
+
export interface BulkUpdatePostingsFilter {
|
|
93
|
+
account_id?: string;
|
|
94
|
+
/** Case-insensitive substring match against `transactions.description`.
|
|
95
|
+
* Use multiple bulk calls for descriptor variants — there is no regex. */
|
|
96
|
+
description_contains?: string;
|
|
97
|
+
currency?: string;
|
|
98
|
+
from?: string;
|
|
99
|
+
to?: string;
|
|
100
|
+
merchant_id?: string;
|
|
101
|
+
}
|
|
102
|
+
export interface BulkUpdatePostingsSet {
|
|
103
|
+
account_id?: string;
|
|
104
|
+
memo?: string | null;
|
|
105
|
+
}
|
|
106
|
+
export interface BulkUpdatePostingsResult {
|
|
107
|
+
affected: number;
|
|
108
|
+
sample_posting_ids: string[];
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Backfill primitive. Update every posting matching the filter in one SQL
|
|
112
|
+
* UPDATE, return the affected count plus a sample of ids so the caller (often
|
|
113
|
+
* an AI tool) can quote evidence back to the user.
|
|
114
|
+
*
|
|
115
|
+
* Refuses to run without at least one filter field (no "update everything"
|
|
116
|
+
* escape hatch) and without at least one set field. Also refuses a no-op
|
|
117
|
+
* recategorization where `set.account_id` equals `filter.account_id` —
|
|
118
|
+
* agents shouldn't waste tool calls on identity transforms.
|
|
119
|
+
*
|
|
120
|
+
* Safe-field policy mirrors `updatePosting`: account_id + memo only.
|
|
121
|
+
* Amount/currency corrections must go through delete + re-record to keep
|
|
122
|
+
* the transaction's debit=credit invariant intact.
|
|
123
|
+
*/
|
|
124
|
+
export declare function bulkUpdatePostings(db: Database.Database, filter: BulkUpdatePostingsFilter, set: BulkUpdatePostingsSet): BulkUpdatePostingsResult;
|
|
92
125
|
export interface DuplicateGroupTransaction {
|
|
93
126
|
id: string;
|
|
94
127
|
date: string;
|
|
@@ -170,3 +203,4 @@ export interface TransactionTotals {
|
|
|
170
203
|
postings: number;
|
|
171
204
|
}
|
|
172
205
|
export declare function countTransactions(db: Database.Database): TransactionTotals;
|
|
206
|
+
export declare function countTransactionsBySourceFile(db: Database.Database, fileId: string): number;
|
|
@@ -129,6 +129,87 @@ export function updatePosting(db, postingId, fields) {
|
|
|
129
129
|
export function deleteTransaction(db, transactionId) {
|
|
130
130
|
return db.prepare(`DELETE FROM transactions WHERE id = ?`).run(transactionId).changes;
|
|
131
131
|
}
|
|
132
|
+
/**
|
|
133
|
+
* Backfill primitive. Update every posting matching the filter in one SQL
|
|
134
|
+
* UPDATE, return the affected count plus a sample of ids so the caller (often
|
|
135
|
+
* an AI tool) can quote evidence back to the user.
|
|
136
|
+
*
|
|
137
|
+
* Refuses to run without at least one filter field (no "update everything"
|
|
138
|
+
* escape hatch) and without at least one set field. Also refuses a no-op
|
|
139
|
+
* recategorization where `set.account_id` equals `filter.account_id` —
|
|
140
|
+
* agents shouldn't waste tool calls on identity transforms.
|
|
141
|
+
*
|
|
142
|
+
* Safe-field policy mirrors `updatePosting`: account_id + memo only.
|
|
143
|
+
* Amount/currency corrections must go through delete + re-record to keep
|
|
144
|
+
* the transaction's debit=credit invariant intact.
|
|
145
|
+
*/
|
|
146
|
+
export function bulkUpdatePostings(db, filter, set) {
|
|
147
|
+
const filterFields = Object.keys(filter)
|
|
148
|
+
.filter((k) => filter[k] !== undefined && filter[k] !== "");
|
|
149
|
+
if (filterFields.length === 0) {
|
|
150
|
+
throw new Error("bulkUpdatePostings: at least one filter field is required.");
|
|
151
|
+
}
|
|
152
|
+
const setFields = Object.keys(set)
|
|
153
|
+
.filter((k) => set[k] !== undefined);
|
|
154
|
+
if (setFields.length === 0) {
|
|
155
|
+
throw new Error("bulkUpdatePostings: at least one set field is required.");
|
|
156
|
+
}
|
|
157
|
+
if (set.account_id !== undefined && set.account_id === filter.account_id) {
|
|
158
|
+
throw new Error("bulkUpdatePostings: set.account_id equals filter.account_id (no-op).");
|
|
159
|
+
}
|
|
160
|
+
const whereClauses = [];
|
|
161
|
+
const whereParams = [];
|
|
162
|
+
if (filter.account_id) {
|
|
163
|
+
whereClauses.push("p.account_id = ?");
|
|
164
|
+
whereParams.push(filter.account_id);
|
|
165
|
+
}
|
|
166
|
+
if (filter.currency) {
|
|
167
|
+
whereClauses.push("p.currency = ?");
|
|
168
|
+
whereParams.push(filter.currency);
|
|
169
|
+
}
|
|
170
|
+
if (filter.merchant_id) {
|
|
171
|
+
whereClauses.push("t.merchant_id = ?");
|
|
172
|
+
whereParams.push(filter.merchant_id);
|
|
173
|
+
}
|
|
174
|
+
if (filter.from) {
|
|
175
|
+
whereClauses.push("t.date >= ?");
|
|
176
|
+
whereParams.push(filter.from);
|
|
177
|
+
}
|
|
178
|
+
if (filter.to) {
|
|
179
|
+
whereClauses.push("t.date <= ?");
|
|
180
|
+
whereParams.push(filter.to);
|
|
181
|
+
}
|
|
182
|
+
if (filter.description_contains) {
|
|
183
|
+
whereClauses.push("LOWER(t.description) LIKE ?");
|
|
184
|
+
whereParams.push(`%${filter.description_contains.toLowerCase()}%`);
|
|
185
|
+
}
|
|
186
|
+
const matchIdsSql = `SELECT p.id
|
|
187
|
+
FROM postings p
|
|
188
|
+
JOIN transactions t ON t.id = p.transaction_id
|
|
189
|
+
WHERE ${whereClauses.join(" AND ")}`;
|
|
190
|
+
const sets = [];
|
|
191
|
+
const setParams = [];
|
|
192
|
+
if (set.account_id !== undefined) {
|
|
193
|
+
sets.push("account_id = ?");
|
|
194
|
+
setParams.push(set.account_id);
|
|
195
|
+
}
|
|
196
|
+
if (set.memo !== undefined) {
|
|
197
|
+
sets.push("memo = ?");
|
|
198
|
+
setParams.push(set.memo);
|
|
199
|
+
}
|
|
200
|
+
let affected = 0;
|
|
201
|
+
let sample = [];
|
|
202
|
+
const tx = db.transaction(() => {
|
|
203
|
+
const ids = db.prepare(matchIdsSql).all(...whereParams);
|
|
204
|
+
if (ids.length === 0)
|
|
205
|
+
return;
|
|
206
|
+
sample = ids.slice(0, 10).map((r) => r.id);
|
|
207
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
208
|
+
affected = db.prepare(`UPDATE postings SET ${sets.join(", ")} WHERE id IN (${placeholders})`).run(...setParams, ...ids.map((r) => r.id)).changes;
|
|
209
|
+
});
|
|
210
|
+
tx();
|
|
211
|
+
return { affected, sample_posting_ids: sample };
|
|
212
|
+
}
|
|
132
213
|
/**
|
|
133
214
|
* Heuristic duplicate finder: group transactions by (rounded total debit) and check
|
|
134
215
|
* pairs whose date difference is ≤ toleranceDays. Returns groups with ≥2 members.
|
|
@@ -370,3 +451,8 @@ export function countTransactions(db) {
|
|
|
370
451
|
.get();
|
|
371
452
|
return row;
|
|
372
453
|
}
|
|
454
|
+
export function countTransactionsBySourceFile(db, fileId) {
|
|
455
|
+
return db
|
|
456
|
+
.prepare(`SELECT COUNT(*) AS n FROM transactions WHERE source_file_id = ?`)
|
|
457
|
+
.get(fileId).n;
|
|
458
|
+
}
|
package/dist/db/schema.js
CHANGED
|
@@ -45,6 +45,8 @@ export function migrate(db) {
|
|
|
45
45
|
status TEXT NOT NULL CHECK(status IN ('pending','scanned','failed')),
|
|
46
46
|
raw_text TEXT,
|
|
47
47
|
scanned_at TEXT,
|
|
48
|
+
provider TEXT,
|
|
49
|
+
model TEXT,
|
|
48
50
|
error TEXT,
|
|
49
51
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
50
52
|
);
|
|
@@ -110,10 +112,12 @@ export function migrate(db) {
|
|
|
110
112
|
context_json TEXT,
|
|
111
113
|
answer TEXT,
|
|
112
114
|
resolved_at TEXT,
|
|
115
|
+
deferred_until TEXT,
|
|
113
116
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
114
117
|
);
|
|
115
118
|
|
|
116
119
|
CREATE INDEX IF NOT EXISTS questions_scan_idx ON questions(scan_id);
|
|
120
|
+
CREATE INDEX IF NOT EXISTS questions_deferred_idx ON questions(deferred_until);
|
|
117
121
|
|
|
118
122
|
CREATE TABLE IF NOT EXISTS conversation_history (
|
|
119
123
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -129,6 +133,19 @@ export function migrate(db) {
|
|
|
129
133
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
130
134
|
);
|
|
131
135
|
|
|
136
|
+
CREATE TABLE IF NOT EXISTS rules (
|
|
137
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
138
|
+
kind TEXT NOT NULL,
|
|
139
|
+
key TEXT NOT NULL,
|
|
140
|
+
target TEXT NOT NULL,
|
|
141
|
+
evidence_count INTEGER NOT NULL DEFAULT 1,
|
|
142
|
+
last_seen_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
143
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
144
|
+
UNIQUE(kind, key)
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
CREATE INDEX IF NOT EXISTS rules_kind_idx ON rules(kind);
|
|
148
|
+
|
|
132
149
|
CREATE TABLE IF NOT EXISTS settings (
|
|
133
150
|
key TEXT PRIMARY KEY,
|
|
134
151
|
value TEXT NOT NULL
|
|
@@ -1,8 +1,20 @@
|
|
|
1
1
|
import type Database from "libsql";
|
|
2
2
|
import type { ClosedQuestion } from "../db/queries/questions.js";
|
|
3
3
|
/**
|
|
4
|
-
* Compact every closed question
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Compact every closed question worth learning from into a `rules` row. The
|
|
5
|
+
* deterministic clarifier pass looks rules up by `(kind, key)` via the
|
|
6
|
+
* UNIQUE index, so each evidence event UPSERTs — incrementing
|
|
7
|
+
* `evidence_count` and refreshing `last_seen_at` on repeats rather than
|
|
8
|
+
* appending a near-duplicate.
|
|
9
|
+
*
|
|
10
|
+
* A closure is NOT learned (no rule synthesized) when any of:
|
|
11
|
+
* 1. `kind` is in `RULE_KIND_DENYLIST` — failure-class kinds carry no
|
|
12
|
+
* generalizable signal.
|
|
13
|
+
* 2. `answer` starts with `Skip` — skips are one-time recovery decisions,
|
|
14
|
+
* not patterns the next scan should auto-apply.
|
|
15
|
+
* 3. `rule_key` is null — without a structural key the rule could only
|
|
16
|
+
* match its own prose, which embeds dates/amounts and never re-fires.
|
|
17
|
+
*
|
|
18
|
+
* Returns the count of rules upserted (new or repeat-evidence).
|
|
7
19
|
*/
|
|
8
20
|
export declare function synthesizeMemoryRules(db: Database.Database, closures: readonly ClosedQuestion[]): number;
|
|
@@ -1,24 +1,45 @@
|
|
|
1
|
+
import { upsertRule } from "../db/queries/rules.js";
|
|
1
2
|
/**
|
|
2
|
-
* Compact every closed question
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Compact every closed question worth learning from into a `rules` row. The
|
|
4
|
+
* deterministic clarifier pass looks rules up by `(kind, key)` via the
|
|
5
|
+
* UNIQUE index, so each evidence event UPSERTs — incrementing
|
|
6
|
+
* `evidence_count` and refreshing `last_seen_at` on repeats rather than
|
|
7
|
+
* appending a near-duplicate.
|
|
8
|
+
*
|
|
9
|
+
* A closure is NOT learned (no rule synthesized) when any of:
|
|
10
|
+
* 1. `kind` is in `RULE_KIND_DENYLIST` — failure-class kinds carry no
|
|
11
|
+
* generalizable signal.
|
|
12
|
+
* 2. `answer` starts with `Skip` — skips are one-time recovery decisions,
|
|
13
|
+
* not patterns the next scan should auto-apply.
|
|
14
|
+
* 3. `rule_key` is null — without a structural key the rule could only
|
|
15
|
+
* match its own prose, which embeds dates/amounts and never re-fires.
|
|
16
|
+
*
|
|
17
|
+
* Returns the count of rules upserted (new or repeat-evidence).
|
|
5
18
|
*/
|
|
6
19
|
export function synthesizeMemoryRules(db, closures) {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const exists = db.prepare(`SELECT 1 FROM memories WHERE category = ? AND content = ? LIMIT 1`);
|
|
11
|
-
const insert = db.prepare(`INSERT INTO memories (content, category) VALUES (?, ?)`);
|
|
12
|
-
for (const c of closures) {
|
|
13
|
-
const body = formatRule(c);
|
|
14
|
-
if (exists.get("scanning_hint", body))
|
|
20
|
+
let upserted = 0;
|
|
21
|
+
for (const closure of closures) {
|
|
22
|
+
if (!isRuleSource(closure))
|
|
15
23
|
continue;
|
|
16
|
-
|
|
17
|
-
|
|
24
|
+
upsertRule(db, { kind: closure.kind, key: closure.rule_key, target: closure.answer.trim() });
|
|
25
|
+
upserted++;
|
|
18
26
|
}
|
|
19
|
-
return
|
|
27
|
+
return upserted;
|
|
20
28
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
29
|
+
const RULE_KIND_DENYLIST = new Set([
|
|
30
|
+
"dirty_input",
|
|
31
|
+
"scan_truncated",
|
|
32
|
+
"boundary_continuation",
|
|
33
|
+
]);
|
|
34
|
+
function isRuleSource(c) {
|
|
35
|
+
if (!c.kind || !c.rule_key)
|
|
36
|
+
return false;
|
|
37
|
+
if (RULE_KIND_DENYLIST.has(c.kind))
|
|
38
|
+
return false;
|
|
39
|
+
if (isSkipAnswer(c.answer))
|
|
40
|
+
return false;
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
function isSkipAnswer(answer) {
|
|
44
|
+
return answer.trim().toLowerCase().startsWith("skip");
|
|
24
45
|
}
|
|
@@ -36,6 +36,7 @@ export declare const CLARIFIER_PASSES: readonly ClarifierPass[];
|
|
|
36
36
|
* Single entry point shared by the in-scan resolve phase and the standalone
|
|
37
37
|
* `plasalid clarify` command. Runs deterministic passes first, then (when
|
|
38
38
|
* interactive) hands the leftovers to the LLM clarifier agent. Closed
|
|
39
|
-
* questions get
|
|
39
|
+
* questions get upserted into the rules table (keyed on the question's
|
|
40
|
+
* structural signature, not its prose).
|
|
40
41
|
*/
|
|
41
42
|
export declare function runClarify(opts: RunClarifyOpts): Promise<ClarifySummary>;
|
|
@@ -1,29 +1,38 @@
|
|
|
1
1
|
import { closeQuestion, listQuestions, countQuestions, } from "../db/queries/questions.js";
|
|
2
2
|
import { updatePosting } from "../db/queries/transactions.js";
|
|
3
|
+
import { findRule } from "../db/queries/rules.js";
|
|
3
4
|
import { runClarifyAgent } from "../ai/agent.js";
|
|
4
5
|
import { synthesizeMemoryRules } from "./clarifier-memory.js";
|
|
6
|
+
import { applyRecurrenceRules, generateRecurrenceCandidateQuestions, } from "./recurrence.js";
|
|
5
7
|
import { converge } from "./converge.js";
|
|
6
8
|
const MAX_AGENT_PASSES = 3;
|
|
7
9
|
/**
|
|
8
|
-
* Apply deterministic
|
|
9
|
-
*
|
|
10
|
+
* Apply deterministic resolution via a `(kind, key)` indexed lookup in the
|
|
11
|
+
* rules table. The rule's `key` was computed at question-creation time
|
|
12
|
+
* (see `src/scanner/committer.ts`) from a stable structural signature — merchant id,
|
|
13
|
+
* normalized descriptor, account pair — so the same pattern matches
|
|
14
|
+
* across scans regardless of date, amount, or prompt prose.
|
|
10
15
|
*/
|
|
11
16
|
const memoryRulePass = {
|
|
12
17
|
name: "memory_rule",
|
|
13
|
-
kinds: [
|
|
18
|
+
kinds: [
|
|
19
|
+
"uncategorized",
|
|
20
|
+
"uncategorized_expense",
|
|
21
|
+
"duplicate",
|
|
22
|
+
"correlation",
|
|
23
|
+
"similar_accounts",
|
|
24
|
+
"boundary_continuation",
|
|
25
|
+
"scan_truncated",
|
|
26
|
+
"unknown_merchant",
|
|
27
|
+
],
|
|
14
28
|
async tryResolve(u, ctx) {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
continue;
|
|
23
|
-
if (match.key === key)
|
|
24
|
-
return match.answer;
|
|
25
|
-
}
|
|
26
|
-
return null;
|
|
29
|
+
if (!u.kind)
|
|
30
|
+
return null;
|
|
31
|
+
const key = extractRuleKey(u.context_json);
|
|
32
|
+
if (!key)
|
|
33
|
+
return null;
|
|
34
|
+
const rule = findRule(ctx.db, u.kind, key);
|
|
35
|
+
return rule?.target ?? null;
|
|
27
36
|
},
|
|
28
37
|
};
|
|
29
38
|
/**
|
|
@@ -69,12 +78,19 @@ export const CLARIFIER_PASSES = [
|
|
|
69
78
|
* Single entry point shared by the in-scan resolve phase and the standalone
|
|
70
79
|
* `plasalid clarify` command. Runs deterministic passes first, then (when
|
|
71
80
|
* interactive) hands the leftovers to the LLM clarifier agent. Closed
|
|
72
|
-
* questions get
|
|
81
|
+
* questions get upserted into the rules table (keyed on the question's
|
|
82
|
+
* structural signature, not its prose).
|
|
73
83
|
*/
|
|
74
84
|
export async function runClarify(opts) {
|
|
75
85
|
const { db } = opts;
|
|
76
86
|
const tally = {};
|
|
77
87
|
const closures = [];
|
|
88
|
+
const autoLinked = applyRecurrenceRules(db).linked;
|
|
89
|
+
if (autoLinked > 0)
|
|
90
|
+
tally["recurrence_auto_link"] = autoLinked;
|
|
91
|
+
const generated = generateRecurrenceCandidateQuestions(db, opts.scanId ?? null);
|
|
92
|
+
if (generated > 0)
|
|
93
|
+
tally["recurrence_generation"] = generated;
|
|
78
94
|
const initial = listQuestions(db, { scanId: opts.scanId, limit: 1000 });
|
|
79
95
|
const total = initial.length;
|
|
80
96
|
if (total === 0) {
|
|
@@ -182,16 +198,14 @@ function parseOptions(json) {
|
|
|
182
198
|
return [];
|
|
183
199
|
}
|
|
184
200
|
}
|
|
185
|
-
function
|
|
186
|
-
|
|
187
|
-
}
|
|
188
|
-
function parseRule(body) {
|
|
189
|
-
const idx = body.lastIndexOf(" -> ");
|
|
190
|
-
if (idx < 0)
|
|
201
|
+
function extractRuleKey(contextJson) {
|
|
202
|
+
if (!contextJson)
|
|
191
203
|
return null;
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
204
|
+
try {
|
|
205
|
+
const parsed = JSON.parse(contextJson);
|
|
206
|
+
return typeof parsed?.rule_key === "string" ? parsed.rule_key : null;
|
|
207
|
+
}
|
|
208
|
+
catch {
|
|
195
209
|
return null;
|
|
196
|
-
|
|
210
|
+
}
|
|
197
211
|
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import { type TransactionInput } from "../db/queries/transactions.js";
|
|
3
|
+
/**
|
|
4
|
+
* Staged best-effort transaction commit.
|
|
5
|
+
*
|
|
6
|
+
* Each stage returns a tagged union. Side effects (raising questions,
|
|
7
|
+
* progress emission, placeholder account creation) flow through the
|
|
8
|
+
* `CommitHooks` interface so the pipeline stays pure-ish and testable.
|
|
9
|
+
*
|
|
10
|
+
* The only legitimate drop path is a `dirty_input` validation failure
|
|
11
|
+
* (no date, malformed amount, etc.). Every other resolution problem —
|
|
12
|
+
* unknown merchant, unknown account — is rescued in-place: NULL the
|
|
13
|
+
* merchant, fuzzy-match-or-create the account, raise a typed question
|
|
14
|
+
* for the clarifier to review later.
|
|
15
|
+
*/
|
|
16
|
+
export interface CommitContext {
|
|
17
|
+
readonly scanId: string | null;
|
|
18
|
+
readonly fileId: string | null;
|
|
19
|
+
readonly chunkId: string | null;
|
|
20
|
+
readonly progress: ProgressEmitter | null;
|
|
21
|
+
}
|
|
22
|
+
export interface ProgressEmitter {
|
|
23
|
+
emit(event: {
|
|
24
|
+
chunkId: string;
|
|
25
|
+
kind: "tx" | "question";
|
|
26
|
+
}): void;
|
|
27
|
+
}
|
|
28
|
+
export type CommitOutcome = {
|
|
29
|
+
ok: true;
|
|
30
|
+
transactionId: string;
|
|
31
|
+
raisedQuestions: number;
|
|
32
|
+
} | {
|
|
33
|
+
ok: false;
|
|
34
|
+
reason: DropReason;
|
|
35
|
+
message: string;
|
|
36
|
+
raisedQuestions: number;
|
|
37
|
+
};
|
|
38
|
+
export type DropReason = "dirty_input";
|
|
39
|
+
export interface CommitHooks {
|
|
40
|
+
onCommitted(transactionId: string): void;
|
|
41
|
+
onDirtyInput(input: TransactionInput, reason: string): void;
|
|
42
|
+
onUnknownMerchant(input: TransactionInput, transactionId: string, attemptedId: string): void;
|
|
43
|
+
onPlaceholderAccount(accountId: string, transactionId: string): void;
|
|
44
|
+
onSimilarAccount(originalId: string, matchedId: string, transactionId: string): void;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Default hook wiring: raises typed questions into the DB, ticks the
|
|
48
|
+
* progress emitter. Tests substitute their own hooks to inspect events
|
|
49
|
+
* without touching the question table.
|
|
50
|
+
*
|
|
51
|
+
* Question writes are gated on `ctx.scanId` — outside a scan there is no
|
|
52
|
+
* audit trail to attach to, so best-effort resolution still happens but
|
|
53
|
+
* the typed question is suppressed.
|
|
54
|
+
*/
|
|
55
|
+
export declare function defaultCommitHooks(db: Database.Database, ctx: CommitContext): CommitHooks;
|
|
56
|
+
export declare function runCommitPipeline(db: Database.Database, ctx: CommitContext, input: TransactionInput, hooks?: CommitHooks): CommitOutcome;
|