plasalid 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +4 -0
  2. package/dist/ai/personas.js +29 -6
  3. package/dist/ai/prompt-sections.d.ts +10 -0
  4. package/dist/ai/prompt-sections.js +29 -0
  5. package/dist/ai/system-prompt.js +10 -6
  6. package/dist/ai/tools/clarify.js +35 -0
  7. package/dist/ai/tools/common.js +3 -2
  8. package/dist/ai/tools/index.js +6 -3
  9. package/dist/ai/tools/ingest.js +47 -35
  10. package/dist/ai/tools/mutate.d.ts +2 -0
  11. package/dist/ai/tools/mutate.js +81 -0
  12. package/dist/cli/commands/files.d.ts +7 -0
  13. package/dist/cli/commands/files.js +24 -0
  14. package/dist/cli/commands/rules.js +23 -20
  15. package/dist/cli/commands/scan.js +8 -3
  16. package/dist/cli/helper.d.ts +9 -1
  17. package/dist/cli/helper.js +17 -2
  18. package/dist/cli/index.js +12 -0
  19. package/dist/cli/ink/FilesBrowser.d.ts +7 -0
  20. package/dist/cli/ink/FilesBrowser.js +103 -0
  21. package/dist/cli/ink/ListBrowser.d.ts +9 -1
  22. package/dist/cli/ink/ListBrowser.js +2 -2
  23. package/dist/cli/ink/PromptFrame.js +1 -1
  24. package/dist/cli/ink/ScanDashboard.js +90 -65
  25. package/dist/cli/ink/hooks/useFooterText.js +14 -22
  26. package/dist/db/queries/files.d.ts +29 -0
  27. package/dist/db/queries/files.js +34 -0
  28. package/dist/db/queries/questions.d.ts +17 -0
  29. package/dist/db/queries/questions.js +47 -9
  30. package/dist/db/queries/rules.d.ts +31 -0
  31. package/dist/db/queries/rules.js +55 -0
  32. package/dist/db/queries/transactions.d.ts +34 -0
  33. package/dist/db/queries/transactions.js +86 -0
  34. package/dist/db/schema.js +17 -0
  35. package/dist/scanner/clarifier-memory.d.ts +15 -3
  36. package/dist/scanner/clarifier-memory.js +38 -17
  37. package/dist/scanner/clarifier.d.ts +2 -1
  38. package/dist/scanner/clarifier.js +40 -26
  39. package/dist/scanner/commit-pipeline.d.ts +56 -0
  40. package/dist/scanner/commit-pipeline.js +204 -0
  41. package/dist/scanner/committer.d.ts +56 -0
  42. package/dist/scanner/committer.js +204 -0
  43. package/dist/scanner/parse.js +25 -7
  44. package/dist/scanner/recurrence-pipeline.d.ts +28 -0
  45. package/dist/scanner/recurrence-pipeline.js +126 -0
  46. package/dist/scanner/recurrence.d.ts +28 -0
  47. package/dist/scanner/recurrence.js +155 -0
  48. package/dist/scanner/rule-keys.d.ts +13 -0
  49. package/dist/scanner/rule-keys.js +28 -0
  50. package/dist/scanner/rules.d.ts +13 -0
  51. package/dist/scanner/rules.js +28 -0
  52. package/package.json +1 -1
@@ -0,0 +1,204 @@
1
+ import { validateTransaction, insertTransactionRows, } from "../db/queries/transactions.js";
2
+ import { createAccount, findAccountById, findAccountsByFuzzyName, ensureStructuralAccount, ensureTopLevelRoot, TOP_LEVEL_TYPES, } from "../db/queries/account-balance.js";
3
+ import { recordQuestion } from "../db/queries/questions.js";
4
+ import { accountIdKey, accountPairKey, descriptorKey, } from "./rule-keys.js";
5
+ /**
6
+ * Default hook wiring: raises typed questions into the DB, ticks the
7
+ * progress emitter. Tests substitute their own hooks to inspect events
8
+ * without touching the question table.
9
+ *
10
+ * Question writes are gated on `ctx.scanId` — outside a scan there is no
11
+ * audit trail to attach to, so best-effort resolution still happens but
12
+ * the typed question is suppressed.
13
+ */
14
+ export function defaultCommitHooks(db, ctx) {
15
+ const tick = (kind) => {
16
+ if (ctx.progress && ctx.chunkId)
17
+ ctx.progress.emit({ chunkId: ctx.chunkId, kind });
18
+ };
19
+ const raise = (input) => {
20
+ if (!ctx.scanId)
21
+ return;
22
+ recordQuestion(db, { ...input, file_id: ctx.fileId, scan_id: ctx.scanId });
23
+ tick("question");
24
+ };
25
+ return {
26
+ onCommitted: () => tick("tx"),
27
+ onDirtyInput: (input, reason) => raise({
28
+ transaction_id: null,
29
+ account_id: null,
30
+ kind: "dirty_input",
31
+ prompt: `The scanner returned a row that couldn't be validated: ${reason}. ` +
32
+ `Raw description: "${input.description}" on ${input.date}.`,
33
+ context: { description: input.description, date: input.date, reason },
34
+ }),
35
+ onUnknownMerchant: (input, transactionId, attemptedId) => {
36
+ const descriptor = input.raw_descriptor || input.description;
37
+ raise({
38
+ transaction_id: transactionId,
39
+ account_id: null,
40
+ kind: "unknown_merchant",
41
+ prompt: `The scanner referenced merchant id "${attemptedId}" but no such merchant exists. ` +
42
+ `Link "${descriptor}" to an existing merchant or leave it unlinked.`,
43
+ context: { rule_key: descriptorKey(descriptor), descriptor, attempted_id: attemptedId },
44
+ });
45
+ },
46
+ onPlaceholderAccount: (accountId, transactionId) => raise({
47
+ transaction_id: transactionId,
48
+ account_id: accountId,
49
+ kind: "uncategorized",
50
+ prompt: `A placeholder account was created for posting "${accountId}". ` +
51
+ `Confirm the category, merge into an existing account, or rename.`,
52
+ context: { rule_key: accountIdKey(accountId), placeholder_id: accountId },
53
+ }),
54
+ onSimilarAccount: (originalId, matchedId, transactionId) => raise({
55
+ transaction_id: transactionId,
56
+ account_id: matchedId,
57
+ kind: "similar_accounts",
58
+ prompt: `The scanner referenced "${originalId}" — the closest existing account is "${matchedId}". ` +
59
+ `Confirm they are the same, or split them apart.`,
60
+ context: {
61
+ rule_key: accountPairKey(originalId, matchedId),
62
+ original_id: originalId,
63
+ matched_id: matchedId,
64
+ },
65
+ }),
66
+ };
67
+ }
68
+ export function runCommitPipeline(db, ctx, input, hooks = defaultCommitHooks(db, ctx)) {
69
+ const validation = stageValidate(input);
70
+ if (!validation.ok) {
71
+ hooks.onDirtyInput(input, validation.reason);
72
+ return { ok: false, reason: "dirty_input", message: validation.reason, raisedQuestions: 1 };
73
+ }
74
+ const merchant = stageResolveMerchant(db, validation.validated);
75
+ const accounts = stageResolveAccounts(db, validation.validated);
76
+ const committed = {
77
+ ...validation.validated,
78
+ merchant_id: merchant.merchantId,
79
+ postings: accounts.postings,
80
+ };
81
+ const tx = db.transaction(() => insertTransactionRows(db, committed));
82
+ tx();
83
+ hooks.onCommitted(committed.id);
84
+ const raised = applyHints({ hooks, transactionId: committed.id, merchant, accounts, input });
85
+ return { ok: true, transactionId: committed.id, raisedQuestions: raised };
86
+ }
87
+ function stageValidate(input) {
88
+ try {
89
+ return { ok: true, validated: validateTransaction(input) };
90
+ }
91
+ catch (err) {
92
+ return { ok: false, reason: err?.message ?? String(err) };
93
+ }
94
+ }
95
+ function stageResolveMerchant(db, input) {
96
+ if (!input.merchant_id)
97
+ return { merchantId: null, attemptedUnknownId: null };
98
+ const exists = db.prepare(`SELECT 1 FROM merchants WHERE id = ?`).get(input.merchant_id);
99
+ if (exists)
100
+ return { merchantId: input.merchant_id, attemptedUnknownId: null };
101
+ return { merchantId: null, attemptedUnknownId: input.merchant_id };
102
+ }
103
+ function stageResolveAccounts(db, input) {
104
+ const postings = [];
105
+ const hints = [];
106
+ for (const p of input.postings) {
107
+ const resolved = resolveOnePosting(db, p);
108
+ postings.push(resolved.posting);
109
+ if (resolved.hint)
110
+ hints.push(resolved.hint);
111
+ }
112
+ return { postings, hints };
113
+ }
114
+ function resolveOnePosting(db, posting) {
115
+ if (findAccountById(db, posting.account_id)) {
116
+ return { posting, hint: null };
117
+ }
118
+ const matched = bestFuzzyMatch(db, posting.account_id);
119
+ if (matched) {
120
+ return {
121
+ posting: { ...posting, account_id: matched },
122
+ hint: { type: "similar_matched", originalId: posting.account_id, matchedId: matched },
123
+ };
124
+ }
125
+ const placeholderId = ensurePlaceholderAccount(db, posting.account_id);
126
+ return {
127
+ posting: { ...posting, account_id: placeholderId },
128
+ hint: { type: "placeholder_created", accountId: placeholderId },
129
+ };
130
+ }
131
+ const FUZZY_THRESHOLD = 0.7;
132
+ function bestFuzzyMatch(db, accountId) {
133
+ const leaf = leafSegment(accountId).replace(/[-_]+/g, " ");
134
+ if (!leaf)
135
+ return null;
136
+ const matches = findAccountsByFuzzyName(db, leaf, FUZZY_THRESHOLD);
137
+ return matches[0]?.account.id ?? null;
138
+ }
139
+ function leafSegment(id) {
140
+ const segments = id.split(":");
141
+ return segments[segments.length - 1] ?? id;
142
+ }
143
+ /**
144
+ * Create the agent-supplied account id (and any missing intermediate parents)
145
+ * as placeholders so the transaction can land. If the id's top-level segment
146
+ * isn't a known account type, fall back to `expense:uncategorized`.
147
+ */
148
+ function ensurePlaceholderAccount(db, accountId) {
149
+ const segments = accountId.split(":").filter(Boolean);
150
+ if (segments.length === 0)
151
+ return ensureUncategorizedFallback(db);
152
+ const type = segments[0];
153
+ if (!TOP_LEVEL_TYPES.includes(type))
154
+ return ensureUncategorizedFallback(db);
155
+ ensureTopLevelRoot(db, type);
156
+ for (let i = 2; i <= segments.length; i++) {
157
+ const id = segments.slice(0, i).join(":");
158
+ if (findAccountById(db, id))
159
+ continue;
160
+ const parentId = i === 1 ? null : segments.slice(0, i - 1).join(":");
161
+ const name = humanizeSegment(segments[i - 1]);
162
+ try {
163
+ createAccount(db, { id, name, type, parent_id: parentId });
164
+ }
165
+ catch (err) {
166
+ if (err?.code === "ACCOUNT_EXISTS")
167
+ continue;
168
+ return ensureUncategorizedFallback(db);
169
+ }
170
+ }
171
+ return accountId;
172
+ }
173
+ function ensureUncategorizedFallback(db) {
174
+ ensureStructuralAccount(db, "expense:uncategorized");
175
+ return "expense:uncategorized";
176
+ }
177
+ function humanizeSegment(segment) {
178
+ const spaced = segment.replace(/[-_]+/g, " ").trim();
179
+ if (!spaced)
180
+ return "Placeholder";
181
+ return spaced.replace(/\b\w/g, (c) => c.toUpperCase());
182
+ }
183
+ function applyHints(args) {
184
+ let raised = 0;
185
+ if (args.merchant.attemptedUnknownId) {
186
+ args.hooks.onUnknownMerchant(args.input, args.transactionId, args.merchant.attemptedUnknownId);
187
+ raised++;
188
+ }
189
+ for (const hint of args.accounts.hints) {
190
+ dispatchHint(hint, args.hooks, args.transactionId);
191
+ raised++;
192
+ }
193
+ return raised;
194
+ }
195
+ function dispatchHint(hint, hooks, transactionId) {
196
+ switch (hint.type) {
197
+ case "placeholder_created":
198
+ hooks.onPlaceholderAccount(hint.accountId, transactionId);
199
+ return;
200
+ case "similar_matched":
201
+ hooks.onSimilarAccount(hint.originalId, hint.matchedId, transactionId);
202
+ return;
203
+ }
204
+ }
@@ -0,0 +1,56 @@
1
+ import type Database from "libsql";
2
+ import { type TransactionInput } from "../db/queries/transactions.js";
3
+ /**
4
+ * Staged best-effort transaction commit.
5
+ *
6
+ * Each stage returns a tagged union. Side effects (raising questions,
7
+ * progress emission, placeholder account creation) flow through the
8
+ * `CommitHooks` interface so the pipeline stays pure-ish and testable.
9
+ *
10
+ * The only legitimate drop path is a `dirty_input` validation failure
11
+ * (no date, malformed amount, etc.). Every other resolution problem —
12
+ * unknown merchant, unknown account — is rescued in-place: NULL the
13
+ * merchant, fuzzy-match-or-create the account, raise a typed question
14
+ * for the clarifier to review later.
15
+ */
16
+ export interface CommitContext {
17
+ readonly scanId: string | null;
18
+ readonly fileId: string | null;
19
+ readonly chunkId: string | null;
20
+ readonly progress: ProgressEmitter | null;
21
+ }
22
+ export interface ProgressEmitter {
23
+ emit(event: {
24
+ chunkId: string;
25
+ kind: "tx" | "question";
26
+ }): void;
27
+ }
28
+ export type CommitOutcome = {
29
+ ok: true;
30
+ transactionId: string;
31
+ raisedQuestions: number;
32
+ } | {
33
+ ok: false;
34
+ reason: DropReason;
35
+ message: string;
36
+ raisedQuestions: number;
37
+ };
38
+ export type DropReason = "dirty_input";
39
+ export interface CommitHooks {
40
+ onCommitted(transactionId: string): void;
41
+ onDirtyInput(input: TransactionInput, reason: string): void;
42
+ onUnknownMerchant(input: TransactionInput, transactionId: string, attemptedId: string): void;
43
+ onPlaceholderAccount(accountId: string, transactionId: string): void;
44
+ onSimilarAccount(originalId: string, matchedId: string, transactionId: string): void;
45
+ }
46
+ /**
47
+ * Default hook wiring: raises typed questions into the DB, ticks the
48
+ * progress emitter. Tests substitute their own hooks to inspect events
49
+ * without touching the question table.
50
+ *
51
+ * Question writes are gated on `ctx.scanId` — outside a scan there is no
52
+ * audit trail to attach to, so best-effort resolution still happens but
53
+ * the typed question is suppressed.
54
+ */
55
+ export declare function defaultCommitHooks(db: Database.Database, ctx: CommitContext): CommitHooks;
56
+ export declare function commitTransaction(db: Database.Database, ctx: CommitContext, input: TransactionInput, hooks?: CommitHooks): CommitOutcome;
@@ -0,0 +1,204 @@
1
+ import { validateTransaction, insertTransactionRows, } from "../db/queries/transactions.js";
2
+ import { createAccount, findAccountById, findAccountsByFuzzyName, ensureStructuralAccount, ensureTopLevelRoot, TOP_LEVEL_TYPES, } from "../db/queries/account-balance.js";
3
+ import { recordQuestion } from "../db/queries/questions.js";
4
+ import { accountIdKey, accountPairKey, descriptorKey, } from "./rules.js";
5
+ /**
6
+ * Default hook wiring: raises typed questions into the DB, ticks the
7
+ * progress emitter. Tests substitute their own hooks to inspect events
8
+ * without touching the question table.
9
+ *
10
+ * Question writes are gated on `ctx.scanId` — outside a scan there is no
11
+ * audit trail to attach to, so best-effort resolution still happens but
12
+ * the typed question is suppressed.
13
+ */
14
+ export function defaultCommitHooks(db, ctx) {
15
+ const tick = (kind) => {
16
+ if (ctx.progress && ctx.chunkId)
17
+ ctx.progress.emit({ chunkId: ctx.chunkId, kind });
18
+ };
19
+ const raise = (input) => {
20
+ if (!ctx.scanId)
21
+ return;
22
+ recordQuestion(db, { ...input, file_id: ctx.fileId, scan_id: ctx.scanId });
23
+ tick("question");
24
+ };
25
+ return {
26
+ onCommitted: () => tick("tx"),
27
+ onDirtyInput: (input, reason) => raise({
28
+ transaction_id: null,
29
+ account_id: null,
30
+ kind: "dirty_input",
31
+ prompt: `The scanner returned a row that couldn't be validated: ${reason}. ` +
32
+ `Raw description: "${input.description}" on ${input.date}.`,
33
+ context: { description: input.description, date: input.date, reason },
34
+ }),
35
+ onUnknownMerchant: (input, transactionId, attemptedId) => {
36
+ const descriptor = input.raw_descriptor || input.description;
37
+ raise({
38
+ transaction_id: transactionId,
39
+ account_id: null,
40
+ kind: "unknown_merchant",
41
+ prompt: `The scanner referenced merchant id "${attemptedId}" but no such merchant exists. ` +
42
+ `Link "${descriptor}" to an existing merchant or leave it unlinked.`,
43
+ context: { rule_key: descriptorKey(descriptor), descriptor, attempted_id: attemptedId },
44
+ });
45
+ },
46
+ onPlaceholderAccount: (accountId, transactionId) => raise({
47
+ transaction_id: transactionId,
48
+ account_id: accountId,
49
+ kind: "uncategorized",
50
+ prompt: `A placeholder account was created for posting "${accountId}". ` +
51
+ `Confirm the category, merge into an existing account, or rename.`,
52
+ context: { rule_key: accountIdKey(accountId), placeholder_id: accountId },
53
+ }),
54
+ onSimilarAccount: (originalId, matchedId, transactionId) => raise({
55
+ transaction_id: transactionId,
56
+ account_id: matchedId,
57
+ kind: "similar_accounts",
58
+ prompt: `The scanner referenced "${originalId}" — the closest existing account is "${matchedId}". ` +
59
+ `Confirm they are the same, or split them apart.`,
60
+ context: {
61
+ rule_key: accountPairKey(originalId, matchedId),
62
+ original_id: originalId,
63
+ matched_id: matchedId,
64
+ },
65
+ }),
66
+ };
67
+ }
68
+ export function commitTransaction(db, ctx, input, hooks = defaultCommitHooks(db, ctx)) {
69
+ const validation = stageValidate(input);
70
+ if (!validation.ok) {
71
+ hooks.onDirtyInput(input, validation.reason);
72
+ return { ok: false, reason: "dirty_input", message: validation.reason, raisedQuestions: 1 };
73
+ }
74
+ const merchant = stageResolveMerchant(db, validation.validated);
75
+ const accounts = stageResolveAccounts(db, validation.validated);
76
+ const committed = {
77
+ ...validation.validated,
78
+ merchant_id: merchant.merchantId,
79
+ postings: accounts.postings,
80
+ };
81
+ const tx = db.transaction(() => insertTransactionRows(db, committed));
82
+ tx();
83
+ hooks.onCommitted(committed.id);
84
+ const raised = applyHints({ hooks, transactionId: committed.id, merchant, accounts, input });
85
+ return { ok: true, transactionId: committed.id, raisedQuestions: raised };
86
+ }
87
+ function stageValidate(input) {
88
+ try {
89
+ return { ok: true, validated: validateTransaction(input) };
90
+ }
91
+ catch (err) {
92
+ return { ok: false, reason: err?.message ?? String(err) };
93
+ }
94
+ }
95
+ function stageResolveMerchant(db, input) {
96
+ if (!input.merchant_id)
97
+ return { merchantId: null, attemptedUnknownId: null };
98
+ const exists = db.prepare(`SELECT 1 FROM merchants WHERE id = ?`).get(input.merchant_id);
99
+ if (exists)
100
+ return { merchantId: input.merchant_id, attemptedUnknownId: null };
101
+ return { merchantId: null, attemptedUnknownId: input.merchant_id };
102
+ }
103
+ function stageResolveAccounts(db, input) {
104
+ const postings = [];
105
+ const hints = [];
106
+ for (const p of input.postings) {
107
+ const resolved = resolveOnePosting(db, p);
108
+ postings.push(resolved.posting);
109
+ if (resolved.hint)
110
+ hints.push(resolved.hint);
111
+ }
112
+ return { postings, hints };
113
+ }
114
+ function resolveOnePosting(db, posting) {
115
+ if (findAccountById(db, posting.account_id)) {
116
+ return { posting, hint: null };
117
+ }
118
+ const matched = bestFuzzyMatch(db, posting.account_id);
119
+ if (matched) {
120
+ return {
121
+ posting: { ...posting, account_id: matched },
122
+ hint: { type: "similar_matched", originalId: posting.account_id, matchedId: matched },
123
+ };
124
+ }
125
+ const placeholderId = ensurePlaceholderAccount(db, posting.account_id);
126
+ return {
127
+ posting: { ...posting, account_id: placeholderId },
128
+ hint: { type: "placeholder_created", accountId: placeholderId },
129
+ };
130
+ }
131
+ const FUZZY_THRESHOLD = 0.7;
132
+ function bestFuzzyMatch(db, accountId) {
133
+ const leaf = leafSegment(accountId).replace(/[-_]+/g, " ");
134
+ if (!leaf)
135
+ return null;
136
+ const matches = findAccountsByFuzzyName(db, leaf, FUZZY_THRESHOLD);
137
+ return matches[0]?.account.id ?? null;
138
+ }
139
+ function leafSegment(id) {
140
+ const segments = id.split(":");
141
+ return segments[segments.length - 1] ?? id;
142
+ }
143
+ /**
144
+ * Create the agent-supplied account id (and any missing intermediate parents)
145
+ * as placeholders so the transaction can land. If the id's top-level segment
146
+ * isn't a known account type, fall back to `expense:uncategorized`.
147
+ */
148
+ function ensurePlaceholderAccount(db, accountId) {
149
+ const segments = accountId.split(":").filter(Boolean);
150
+ if (segments.length === 0)
151
+ return ensureUncategorizedFallback(db);
152
+ const type = segments[0];
153
+ if (!TOP_LEVEL_TYPES.includes(type))
154
+ return ensureUncategorizedFallback(db);
155
+ ensureTopLevelRoot(db, type);
156
+ for (let i = 2; i <= segments.length; i++) {
157
+ const id = segments.slice(0, i).join(":");
158
+ if (findAccountById(db, id))
159
+ continue;
160
+ const parentId = i === 1 ? null : segments.slice(0, i - 1).join(":");
161
+ const name = humanizeSegment(segments[i - 1]);
162
+ try {
163
+ createAccount(db, { id, name, type, parent_id: parentId });
164
+ }
165
+ catch (err) {
166
+ if (err?.code === "ACCOUNT_EXISTS")
167
+ continue;
168
+ return ensureUncategorizedFallback(db);
169
+ }
170
+ }
171
+ return accountId;
172
+ }
173
+ function ensureUncategorizedFallback(db) {
174
+ ensureStructuralAccount(db, "expense:uncategorized");
175
+ return "expense:uncategorized";
176
+ }
177
+ function humanizeSegment(segment) {
178
+ const spaced = segment.replace(/[-_]+/g, " ").trim();
179
+ if (!spaced)
180
+ return "Placeholder";
181
+ return spaced.replace(/\b\w/g, (c) => c.toUpperCase());
182
+ }
183
+ function applyHints(args) {
184
+ let raised = 0;
185
+ if (args.merchant.attemptedUnknownId) {
186
+ args.hooks.onUnknownMerchant(args.input, args.transactionId, args.merchant.attemptedUnknownId);
187
+ raised++;
188
+ }
189
+ for (const hint of args.accounts.hints) {
190
+ dispatchHint(hint, args.hooks, args.transactionId);
191
+ raised++;
192
+ }
193
+ return raised;
194
+ }
195
+ function dispatchHint(hint, hooks, transactionId) {
196
+ switch (hint.type) {
197
+ case "placeholder_created":
198
+ hooks.onPlaceholderAccount(hint.accountId, transactionId);
199
+ return;
200
+ case "similar_matched":
201
+ hooks.onSimilarAccount(hint.originalId, hint.matchedId, transactionId);
202
+ return;
203
+ }
204
+ }
@@ -1,6 +1,8 @@
1
1
  import { runWithConcurrency } from "./concurrency.js";
2
2
  import { runScanWorker } from "./worker.js";
3
3
  import { errorMessage } from "./result.js";
4
+ import { getActiveModel } from "../config.js";
5
+ import { getProvider } from "../ai/providers/index.js";
4
6
  const MAX_FILE_WORKERS = 5;
5
7
  const MAX_SCAN_WORKERS_PER_FILE = 5;
6
8
  const HARD_CAP = 8;
@@ -44,18 +46,34 @@ export async function parsePhase(db, state, hooks) {
44
46
  });
45
47
  }
46
48
  /**
47
- * Only flip files to "scanned" for groups that actually completed. On abort
48
- * the pool leaves later groups unclaimed (their settled slot is undefined);
49
- * those rows stay `pending` so a future re-scan can pick them up. Partial
50
- * transactions already committed during the run stay (scanner is DB-direct).
49
+ * Flip each file's `scanned_files` row to its terminal status. Three cases:
50
+ * - settled.ok → 'scanned' + stamp provider/model (provenance for re-scans).
51
+ * - settled, !ok → 'failed' so the user sees it in `plasalid status`.
52
+ * - unsettled, aborted leave 'pending' so a future scan can resume.
53
+ * - unsettled, !aborted → 'failed' (defensive — shouldn't happen, but better
54
+ * a visible failed row than a silent pending one).
55
+ * Partial transactions already committed during the run stay (scanner is DB-direct).
51
56
  */
57
+ const aborted = state.signal?.aborted ?? false;
58
+ const provider = getProvider().name;
59
+ const model = getActiveModel();
60
+ const stampScanned = db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now'), provider = ?, model = ? WHERE id = ?`);
61
+ const stampFailed = db.prepare(`UPDATE scanned_files SET status = 'failed', error = ? WHERE id = ?`);
52
62
  for (let i = 0; i < fileGroups.length; i++) {
53
- if (!settled[i])
54
- continue;
55
63
  const sfId = fileGroups[i].scannedFileId;
56
64
  if (!sfId)
57
65
  continue;
58
- db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now') WHERE id = ?`).run(sfId);
66
+ const r = settled[i];
67
+ if (r?.ok) {
68
+ stampScanned.run(provider, model, sfId);
69
+ }
70
+ else if (r && !r.ok) {
71
+ stampFailed.run(errorMessage(r.error), sfId);
72
+ }
73
+ else if (!aborted) {
74
+ stampFailed.run("worker did not produce a settled result", sfId);
75
+ }
76
+ // else: aborted + unsettled → leave pending for resume
59
77
  }
60
78
  await hooks.afterParse?.(state);
61
79
  }
@@ -0,0 +1,28 @@
1
+ import type Database from "libsql";
2
+ /**
3
+ * Structural key for a recurring-payment bucket. Same key across runs means
4
+ * the same (account, amount, currency, side) signature — the unit on which
5
+ * we learn "yes this recurs" / "no this doesn't" decisions.
6
+ *
7
+ * Embeds amount-in-cents because the recurrence identity *is* the amount:
8
+ * ฿199 monthly ≠ ฿299 monthly. The "no amounts in dedup keys" rule applies
9
+ * to merchant-category rules where amount varies; here it is intrinsic.
10
+ */
11
+ export declare function recurrenceCandidateKey(accountId: string, amountCents: number, currency: string, side: "debit" | "credit"): string;
12
+ /**
13
+ * Fast path. For every learned "Link as recurring" rule, attach any matching
14
+ * unlinked transaction to the existing recurrences row. One rules-table
15
+ * lookup and one recurrences-table lookup per `(account, currency, amount)`
16
+ * bucket — never re-runs the heuristic.
17
+ */
18
+ export declare function applyRecurrenceRules(db: Database.Database): {
19
+ linked: number;
20
+ };
21
+ /**
22
+ * Slow path. Runs the heuristic, drops irregular cadences, and skips any
23
+ * bucket already covered by a rule (either decision — "Link" or "Not
24
+ * recurring" both mean "don't ask again") or by an already-open question
25
+ * with the same key. Each survivor becomes one `recurrence_candidate`
26
+ * question that flows through the existing clarifier pipeline.
27
+ */
28
+ export declare function generateRecurrenceCandidateQuestions(db: Database.Database, scanId: string | null): number;
@@ -0,0 +1,126 @@
1
+ import { findRecurrenceCandidates, linkTransactionToRecurrence, } from "../db/queries/recurrences.js";
2
+ import { recordQuestion } from "../db/queries/questions.js";
3
+ import { formatAmount } from "../currency.js";
4
+ /**
5
+ * Structural key for a recurring-payment bucket. Same key across runs means
6
+ * the same (account, amount, currency, side) signature — the unit on which
7
+ * we learn "yes this recurs" / "no this doesn't" decisions.
8
+ *
9
+ * Embeds amount-in-cents because the recurrence identity *is* the amount:
10
+ * ฿199 monthly ≠ ฿299 monthly. The "no amounts in dedup keys" rule applies
11
+ * to merchant-category rules where amount varies; here it is intrinsic.
12
+ */
13
+ export function recurrenceCandidateKey(accountId, amountCents, currency, side) {
14
+ return `recurrence:${accountId}:${currency}:${amountCents}:${side}`;
15
+ }
16
+ const RULE_KIND = "recurrence_candidate";
17
+ const ANSWER_LINK = "Link as recurring";
18
+ /**
19
+ * Fast path. For every learned "Link as recurring" rule, attach any matching
20
+ * unlinked transaction to the existing recurrences row. One rules-table
21
+ * lookup and one recurrences-table lookup per `(account, currency, amount)`
22
+ * bucket — never re-runs the heuristic.
23
+ */
24
+ export function applyRecurrenceRules(db) {
25
+ const rules = db.prepare(`SELECT key FROM rules WHERE kind = ? AND target = ?`).all(RULE_KIND, ANSWER_LINK);
26
+ if (rules.length === 0)
27
+ return { linked: 0 };
28
+ const unlinkedByKey = new Map();
29
+ const rows = db.prepare(`SELECT p.transaction_id,
30
+ p.account_id,
31
+ p.currency,
32
+ CASE WHEN p.debit > 0 THEN p.debit ELSE p.credit END AS amount,
33
+ CASE WHEN p.debit > 0 THEN 'debit' ELSE 'credit' END AS side
34
+ FROM postings p
35
+ JOIN transactions t ON t.id = p.transaction_id
36
+ WHERE t.recurrence_id IS NULL
37
+ AND (p.debit > 0 OR p.credit > 0)`).all();
38
+ for (const r of rows) {
39
+ const key = recurrenceCandidateKey(r.account_id, Math.round(r.amount * 100), r.currency, r.side);
40
+ const bucket = unlinkedByKey.get(key) ?? [];
41
+ bucket.push(r);
42
+ unlinkedByKey.set(key, bucket);
43
+ }
44
+ let linked = 0;
45
+ for (const { key } of rules) {
46
+ const bucket = unlinkedByKey.get(key);
47
+ if (!bucket || bucket.length === 0)
48
+ continue;
49
+ const first = bucket[0];
50
+ const recurrence = db.prepare(`SELECT id FROM recurrences WHERE account_id = ? AND currency = ? AND amount_typical = ? LIMIT 1`).get(first.account_id, first.currency, Math.round(first.amount * 100) / 100);
51
+ if (!recurrence)
52
+ continue; // rule learned but aggregate row gone — let the heuristic re-surface
53
+ for (const r of bucket) {
54
+ linkTransactionToRecurrence(db, r.transaction_id, recurrence.id);
55
+ linked++;
56
+ }
57
+ }
58
+ return { linked };
59
+ }
60
+ /**
61
+ * Slow path. Runs the heuristic, drops irregular cadences, and skips any
62
+ * bucket already covered by a rule (either decision — "Link" or "Not
63
+ * recurring" both mean "don't ask again") or by an already-open question
64
+ * with the same key. Each survivor becomes one `recurrence_candidate`
65
+ * question that flows through the existing clarifier pipeline.
66
+ */
67
+ export function generateRecurrenceCandidateQuestions(db, scanId) {
68
+ const coveredKeys = collectCoveredKeys(db);
69
+ const candidates = findRecurrenceCandidates(db).filter((c) => c.implied_frequency !== "irregular");
70
+ let created = 0;
71
+ for (const c of candidates) {
72
+ const amountCents = Math.round(c.amount * 100);
73
+ const side = c.side;
74
+ const key = recurrenceCandidateKey(c.account_id, amountCents, c.currency, side);
75
+ if (coveredKeys.has(key))
76
+ continue;
77
+ recordQuestion(db, {
78
+ transaction_id: null,
79
+ account_id: c.account_id,
80
+ file_id: null,
81
+ scan_id: scanId,
82
+ kind: RULE_KIND,
83
+ prompt: buildPrompt(c),
84
+ options: ["Link as recurring", "Not recurring", "Skip"],
85
+ context: {
86
+ rule_key: key,
87
+ account_id: c.account_id,
88
+ amount: c.amount,
89
+ currency: c.currency,
90
+ side: c.side,
91
+ transaction_ids: c.transactions.map((t) => t.id),
92
+ median_days_between: c.median_days_between,
93
+ implied_frequency: c.implied_frequency,
94
+ },
95
+ });
96
+ coveredKeys.add(key); // avoid duplicate inserts within this same call
97
+ created++;
98
+ }
99
+ return created;
100
+ }
101
+ function collectCoveredKeys(db) {
102
+ const ruleKeys = db.prepare(`SELECT key FROM rules WHERE kind = ?`).all(RULE_KIND);
103
+ const openQuestions = db.prepare(`SELECT context_json FROM questions WHERE kind = ?`).all(RULE_KIND);
104
+ const keys = new Set(ruleKeys.map((r) => r.key));
105
+ for (const q of openQuestions) {
106
+ if (!q.context_json)
107
+ continue;
108
+ try {
109
+ const parsed = JSON.parse(q.context_json);
110
+ if (typeof parsed?.rule_key === "string")
111
+ keys.add(parsed.rule_key);
112
+ }
113
+ catch {
114
+ // malformed context — ignore; the question already exists, generation
115
+ // wouldn't dedupe it anyway, so the worst case is a duplicate.
116
+ }
117
+ }
118
+ return keys;
119
+ }
120
+ function buildPrompt(c) {
121
+ const amountStr = formatAmount(c.amount, c.currency);
122
+ const sideLabel = c.side === "debit" ? "outflow" : "inflow";
123
+ return (`${c.transactions.length} ${sideLabel}s on \`${c.account_id}\` of ${amountStr} ` +
124
+ `every ~${c.median_days_between} days (looks ${c.implied_frequency}). ` +
125
+ `Link them as a recurring item?`);
126
+ }