plasalid 0.5.8 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +20 -12
  2. package/dist/accounts/taxonomy.d.ts +1 -1
  3. package/dist/accounts/taxonomy.js +2 -2
  4. package/dist/ai/agent.d.ts +7 -6
  5. package/dist/ai/agent.js +9 -8
  6. package/dist/ai/personas.d.ts +1 -1
  7. package/dist/ai/personas.js +69 -66
  8. package/dist/ai/prompt-sections.d.ts +4 -5
  9. package/dist/ai/prompt-sections.js +11 -11
  10. package/dist/ai/system-prompt.d.ts +2 -3
  11. package/dist/ai/system-prompt.js +5 -5
  12. package/dist/ai/tools/common.js +13 -5
  13. package/dist/ai/tools/index.js +15 -15
  14. package/dist/ai/tools/ingest.d.ts +2 -2
  15. package/dist/ai/tools/ingest.js +210 -87
  16. package/dist/ai/tools/merchants.js +27 -12
  17. package/dist/ai/tools/read.js +36 -20
  18. package/dist/ai/tools/record.js +79 -19
  19. package/dist/ai/tools/resolve.d.ts +2 -0
  20. package/dist/ai/tools/resolve.js +195 -0
  21. package/dist/ai/tools/types.d.ts +5 -7
  22. package/dist/cli/commands/accounts.js +2 -2
  23. package/dist/cli/commands/record.js +4 -2
  24. package/dist/cli/commands/resolve.d.ts +2 -0
  25. package/dist/cli/commands/resolve.js +13 -0
  26. package/dist/cli/commands/scan.js +18 -22
  27. package/dist/cli/commands/status.js +4 -2
  28. package/dist/cli/format.js +1 -1
  29. package/dist/cli/index.js +10 -10
  30. package/dist/cli/ink/hooks/useFooterText.js +1 -1
  31. package/dist/cli/ink/hooks/useTextInput.js +0 -3
  32. package/dist/cli/ink/scan_dashboard.d.ts +2 -2
  33. package/dist/cli/ink/scan_dashboard.js +3 -3
  34. package/dist/cli/setup.js +6 -3
  35. package/dist/cli/ux.js +1 -1
  36. package/dist/db/queries/account-balance.d.ts +140 -0
  37. package/dist/db/queries/account-balance.js +355 -0
  38. package/dist/db/queries/account_balance.d.ts +0 -1
  39. package/dist/db/queries/account_balance.js +0 -10
  40. package/dist/db/queries/action-log.d.ts +29 -0
  41. package/dist/db/queries/action-log.js +27 -0
  42. package/dist/db/queries/action_log.d.ts +1 -1
  43. package/dist/db/queries/concerns.d.ts +10 -0
  44. package/dist/db/queries/concerns.js +21 -0
  45. package/dist/db/queries/transactions.d.ts +3 -22
  46. package/dist/db/queries/transactions.js +4 -5
  47. package/dist/db/queries/unknowns.d.ts +62 -0
  48. package/dist/db/queries/unknowns.js +114 -0
  49. package/dist/db/schema.js +3 -3
  50. package/dist/resolver/pipeline.d.ts +16 -0
  51. package/dist/resolver/pipeline.js +38 -0
  52. package/dist/resolver/prompts.d.ts +8 -0
  53. package/dist/resolver/prompts.js +26 -0
  54. package/dist/scanner/account-mutex.d.ts +1 -0
  55. package/dist/scanner/account-mutex.js +16 -0
  56. package/dist/scanner/buffer.d.ts +10 -10
  57. package/dist/scanner/buffer.js +15 -15
  58. package/dist/scanner/decrypt-queue.d.ts +57 -0
  59. package/dist/scanner/decrypt-queue.js +114 -0
  60. package/dist/scanner/detectors/correlations.d.ts +2 -0
  61. package/dist/scanner/detectors/correlations.js +51 -0
  62. package/dist/scanner/detectors/duplicates.d.ts +2 -0
  63. package/dist/scanner/detectors/duplicates.js +75 -0
  64. package/dist/scanner/detectors/index.d.ts +18 -0
  65. package/dist/scanner/detectors/index.js +39 -0
  66. package/dist/scanner/detectors/recurrences.d.ts +2 -0
  67. package/dist/scanner/detectors/recurrences.js +49 -0
  68. package/dist/scanner/detectors/similar_accounts.d.ts +2 -0
  69. package/dist/scanner/detectors/similar_accounts.js +64 -0
  70. package/dist/scanner/detectors/similarities.d.ts +2 -0
  71. package/dist/scanner/detectors/similarities.js +73 -0
  72. package/dist/scanner/detectors/types.d.ts +16 -0
  73. package/dist/scanner/detectors/types.js +1 -0
  74. package/dist/scanner/inspectors/correlations.d.ts +2 -0
  75. package/dist/scanner/inspectors/correlations.js +47 -0
  76. package/dist/scanner/inspectors/duplicates.d.ts +2 -0
  77. package/dist/scanner/inspectors/duplicates.js +75 -0
  78. package/dist/scanner/inspectors/index.d.ts +19 -0
  79. package/dist/scanner/inspectors/index.js +39 -0
  80. package/dist/scanner/inspectors/recurrences.d.ts +2 -0
  81. package/dist/scanner/inspectors/recurrences.js +49 -0
  82. package/dist/scanner/inspectors/similarities.d.ts +2 -0
  83. package/dist/scanner/inspectors/similarities.js +73 -0
  84. package/dist/scanner/inspectors/types.d.ts +16 -0
  85. package/dist/scanner/inspectors/types.js +1 -0
  86. package/dist/scanner/pipeline.d.ts +6 -4
  87. package/dist/scanner/pipeline.js +51 -88
  88. package/dist/scanner/prompts.js +2 -2
  89. package/package.json +3 -2
@@ -0,0 +1,73 @@
1
+ import { findSimilarAccounts } from "../../db/queries/account-balance.js";
2
+ /**
3
+ * Flag pairs of accounts whose names are near-identical (Levenshtein ≥ 0.85).
4
+ * Runs whenever a scan committed at least one transaction — the assumption is
5
+ * that the scanner may have created a new account this run, so it's worth a
6
+ * fresh similarity sweep. Idempotent against existing open unknowns: a pair
7
+ * already flagged is not flagged again. The resolver applies "Merge A into B"
8
+ * via merge_accounts.
9
+ */
10
+ function inspect(db, scope) {
11
+ if (scope.fileIds.length === 0)
12
+ return [];
13
+ const pairs = findSimilarAccounts(db);
14
+ if (pairs.length === 0)
15
+ return [];
16
+ const alreadyFlagged = loadAlreadyFlaggedAccountPairs(db);
17
+ const out = [];
18
+ for (const pair of pairs) {
19
+ const key = pairKey(pair.a.id, pair.b.id);
20
+ if (alreadyFlagged.has(key))
21
+ continue;
22
+ out.push({
23
+ file_id: null,
24
+ transaction_id: null,
25
+ account_id: pair.a.id,
26
+ kind: "similar_accounts",
27
+ prompt: `These two accounts look like the same thing (similarity ${pair.similarity}):\n ${pair.a.id} — ${pair.a.name}\n ${pair.b.id} — ${pair.b.name}`,
28
+ options: [
29
+ `Merge ${pair.b.id} into ${pair.a.id}`,
30
+ `Merge ${pair.a.id} into ${pair.b.id}`,
31
+ "Keep separate",
32
+ "Skip",
33
+ ],
34
+ });
35
+ }
36
+ return out;
37
+ }
38
+ /**
39
+ * `similar_accounts` unknowns (open OR resolved) embed the other account's id
40
+ * in their options strings ("Merge X into Y"). Parse those out so we don't
41
+ * re-flag a pair the user has already seen — including pairs they've already
42
+ * answered "Keep separate" on a prior run.
43
+ */
44
+ function loadAlreadyFlaggedAccountPairs(db) {
45
+ const rows = db
46
+ .prepare(`SELECT account_id, options_json FROM unknowns
47
+ WHERE kind = 'similar_accounts' AND account_id IS NOT NULL`)
48
+ .all();
49
+ const out = new Set();
50
+ for (const row of rows) {
51
+ if (!row.options_json)
52
+ continue;
53
+ try {
54
+ const options = JSON.parse(row.options_json);
55
+ for (const opt of options) {
56
+ const match = opt.match(/Merge (\S+) into (\S+)/);
57
+ if (match)
58
+ out.add(pairKey(match[1], match[2]));
59
+ }
60
+ }
61
+ catch {
62
+ // skip malformed options_json
63
+ }
64
+ }
65
+ return out;
66
+ }
67
+ function pairKey(a, b) {
68
+ return [a, b].sort().join("|");
69
+ }
70
+ export const similarAccountsInspector = {
71
+ name: "similar_accounts",
72
+ inspect,
73
+ };
@@ -0,0 +1,16 @@
1
+ import type Database from "libsql";
2
+ import type { RecordUnknownInput } from "../../db/queries/unknowns.js";
3
+ /**
4
+ * Scope passed to every inspector by the scanner's Phase 5. Inspectors emit
5
+ * unknowns for transactions whose `source_file_id` is in `fileIds` (or for
6
+ * cross-pair findings where at least one side lives in that set). Inspectors
7
+ * are free to read the wider DB for context — the scope is a filter for what
8
+ * to surface, not a limit on what to read.
9
+ */
10
+ export interface InspectorScope {
11
+ readonly fileIds: readonly string[];
12
+ }
13
+ export interface Inspector {
14
+ readonly name: string;
15
+ inspect(db: Database.Database, scope: InspectorScope): RecordUnknownInput[];
16
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -1,10 +1,11 @@
1
+ import { type InspectionRunResult } from "./inspectors/index.js";
1
2
  export type ScanFileStatus = "scanned" | "replaced" | "failed" | "skipped";
2
3
  export interface ScanFileResult {
3
4
  name: string;
4
5
  relPath: string;
5
6
  status: ScanFileStatus;
6
7
  transactions: number;
7
- concerns: number;
8
+ unknowns: number;
8
9
  error?: string;
9
10
  }
10
11
  export interface ScanSummary {
@@ -13,7 +14,7 @@ export interface ScanSummary {
13
14
  replaced: number;
14
15
  skipped: number;
15
16
  failed: number;
16
- concerns: number;
17
+ unknowns: number;
17
18
  details: ScanFileResult[];
18
19
  }
19
20
  /** Event hooks the CLI subscribes to. All callbacks are best-effort and ignored if absent. */
@@ -41,11 +42,12 @@ export interface ScanRunEvents {
41
42
  fileName: string;
42
43
  status: "scanned" | "failed";
43
44
  transactions: number;
44
- concerns: number;
45
+ unknowns: number;
45
46
  error?: string;
46
47
  }) => void;
47
- correlating?: (pairs: number) => void;
48
48
  committing?: () => void;
49
+ /** Post-commit inspector pass. `result.total` is the count of unknowns emitted by all inspectors combined. */
50
+ inspecting?: (result: InspectionRunResult) => void;
49
51
  }
50
52
  export interface RunScanOptions {
51
53
  regex?: string;
@@ -1,14 +1,14 @@
1
1
  import { randomUUID } from "crypto";
2
2
  import { getDb } from "../db/connection.js";
3
- import { countOpenConcerns } from "../db/queries/concerns.js";
4
- import { correlatePairs } from "../db/queries/transactions.js";
3
+ import { countOpenUnknowns } from "../db/queries/unknowns.js";
4
+ import { runInspectors } from "./inspectors/index.js";
5
5
  import { runScanAgent } from "../ai/agent.js";
6
6
  import { buildDocumentBlock } from "./pdf.js";
7
7
  import { buildScanUserMessage } from "./prompts.js";
8
8
  import { scanDataDir } from "./walker.js";
9
9
  import { BufferedWriteContext } from "./buffer.js";
10
10
  import { runWithConcurrency } from "./concurrency.js";
11
- import { decryptQueue, confirmProceedAfterFailures, } from "./decrypt_queue.js";
11
+ import { decryptQueue, confirmProceedAfterFailures, } from "./decrypt-queue.js";
12
12
  export function compileMatcher(input) {
13
13
  return new RegExp(input, "i");
14
14
  }
@@ -38,13 +38,35 @@ export async function runScan(opts = {}) {
38
38
  }
39
39
  // Phase 2 — parallel scan with buffered writes
40
40
  const scanResults = await scanInParallel(db, decryptResult.decrypted, { concurrency, events });
41
- // Phase 3 — cross-file correlation pre-commit
42
- const pairCount = applyCrossFileCorrelations(scanResults);
43
- events?.correlating?.(pairCount);
44
- // Phase 4 — per-file commit
41
+ // Phase 3 — per-file commit
45
42
  events?.committing?.();
46
- const fileResults = commitAll(db, decryptResult, scanResults);
47
- return buildSummary(allFiles.length, fileResults);
43
+ const { details, committedFileIds } = commitAll(db, decryptResult, scanResults);
44
+ // Phase 4 — post-commit inspector sweep (duplicates, correlations, recurrences, similar accounts)
45
+ if (committedFileIds.length > 0) {
46
+ const inspectionResult = runInspectors(db, { fileIds: committedFileIds });
47
+ events?.inspecting?.(inspectionResult);
48
+ addInspectionUnknownsToSummary(details, committedFileIds, inspectionResult.total);
49
+ }
50
+ return buildSummary(allFiles.length, details);
51
+ }
52
+ /**
53
+ * Inspector unknowns were inserted after the per-file commit, so the per-file
54
+ * `unknowns` counters in `details` don't see them. Spread the total across the
55
+ * files that participated in this run so the summary's `unknowns` line stays
56
+ * truthful. Distribution is per-file proportional — good enough for a summary,
57
+ * not a load-bearing fact.
58
+ */
59
+ function addInspectionUnknownsToSummary(details, committedFileIds, total) {
60
+ if (total === 0 || committedFileIds.length === 0)
61
+ return;
62
+ const scannedDetails = details.filter(d => d.status === "scanned" || d.status === "replaced");
63
+ if (scannedDetails.length === 0)
64
+ return;
65
+ const perFile = Math.floor(total / scannedDetails.length);
66
+ const remainder = total - perFile * scannedDetails.length;
67
+ for (let i = 0; i < scannedDetails.length; i++) {
68
+ scannedDetails[i].unknowns += perFile + (i < remainder ? 1 : 0);
69
+ }
48
70
  }
49
71
  async function scanInParallel(db, files, opts) {
50
72
  const tasks = files.map(f => () => scanOneFile(db, f, opts.events));
@@ -97,7 +119,7 @@ async function scanOneFile(db, file, events) {
97
119
  fileName: file.fileName,
98
120
  status: "scanned",
99
121
  transactions: buffer.transactions.length,
100
- concerns: buffer.concerns.length,
122
+ unknowns: buffer.unknowns.length,
101
123
  });
102
124
  return { decryptedFile: file, buffer, agentText: text };
103
125
  }
@@ -107,103 +129,43 @@ async function scanOneFile(db, file, events) {
107
129
  fileName: file.fileName,
108
130
  status: "failed",
109
131
  transactions: 0,
110
- concerns: 0,
132
+ unknowns: 0,
111
133
  error: message,
112
134
  });
113
135
  return { decryptedFile: file, buffer, error: message, agentText: "" };
114
136
  }
115
137
  }
116
- /** Phase 3: cross-file correlation */
117
- /**
118
- * For every pair of buffered entries that look like the same money movement
119
- * across two different files, append a mirror concern to each side's buffer.
120
- * Returns the number of pairs detected so the CLI can report it.
121
- */
122
- function applyCrossFileCorrelations(results) {
123
- const all = [];
124
- for (const res of results) {
125
- if (res.error)
126
- continue;
127
- for (const bt of res.buffer.transactions) {
128
- all.push({
129
- file: res,
130
- transactionId: bt.transaction_id,
131
- postings: bt.input.postings,
132
- date: bt.input.date,
133
- description: bt.input.description,
134
- });
135
- }
136
- }
137
- const candidates = all.map(e => {
138
- const debit = e.postings.reduce((s, p) => s + (p.debit ?? 0), 0);
139
- const currency = e.postings.find(p => p.currency)?.currency ?? "THB";
140
- const ids = Array.from(new Set(e.postings.map(p => p.account_id)));
141
- return {
142
- id: e.transactionId,
143
- date: e.date,
144
- description: e.description,
145
- amount: Math.round(debit * 100) / 100,
146
- currency,
147
- account_ids: ids,
148
- account_names: ids,
149
- };
150
- });
151
- const pairs = correlatePairs(candidates, { toleranceDays: 3 });
152
- const byTransaction = new Map(all.map(a => [a.transactionId, a]));
153
- for (const pair of pairs) {
154
- const a = byTransaction.get(pair.a.id);
155
- const b = byTransaction.get(pair.b.id);
156
- if (!a || !b)
157
- continue;
158
- if (a.file === b.file)
159
- continue;
160
- const amountStr = `฿${pair.amount.toLocaleString("en-US", { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
161
- a.file.buffer.appendConcern({
162
- transaction_id: a.transactionId,
163
- account_id: null,
164
- prompt: `Looks like the matching half of this ${amountStr} movement on ${a.date} was also recorded in ${b.file.decryptedFile.fileName} on ${b.date}. Merge during review?`,
165
- options: ["Yes — merge into one transaction", "No — these are two real events", "Skip — leave as is"],
166
- });
167
- b.file.buffer.appendConcern({
168
- transaction_id: b.transactionId,
169
- account_id: null,
170
- prompt: `Looks like the matching half of this ${amountStr} movement on ${b.date} was also recorded in ${a.file.decryptedFile.fileName} on ${a.date}. Merge during review?`,
171
- options: ["Yes — merge into one transaction", "No — these are two real events", "Skip — leave as is"],
172
- });
173
- }
174
- return pairs.filter(p => byTransaction.get(p.a.id)?.file !== byTransaction.get(p.b.id)?.file).length;
175
- }
176
- /** Phase 4: commit */
177
138
  function commitAll(db, decryptResult, scanResults) {
178
- const out = [];
139
+ const details = [];
140
+ const committedFileIds = [];
179
141
  for (const skipped of decryptResult.skipped) {
180
- out.push({
142
+ details.push({
181
143
  name: skipped.file.name,
182
144
  relPath: skipped.file.relPath,
183
145
  status: "skipped",
184
146
  transactions: 0,
185
- concerns: countOpenConcerns(db, { file_id: skipped.existingScannedFileId }),
147
+ unknowns: countOpenUnknowns(db, { file_id: skipped.existingScannedFileId }),
186
148
  });
187
149
  }
188
150
  for (const failed of decryptResult.failed) {
189
- out.push({
151
+ details.push({
190
152
  name: failed.file.name,
191
153
  relPath: failed.file.relPath,
192
154
  status: "failed",
193
155
  transactions: 0,
194
- concerns: 0,
156
+ unknowns: 0,
195
157
  error: failed.error,
196
158
  });
197
159
  }
198
160
  for (const res of scanResults) {
199
161
  const { decryptedFile, buffer, error, agentText } = res;
200
162
  if (error) {
201
- out.push({
163
+ details.push({
202
164
  name: decryptedFile.fileName,
203
165
  relPath: decryptedFile.relPath,
204
166
  status: "failed",
205
167
  transactions: 0,
206
- concerns: buffer.concerns.length,
168
+ unknowns: buffer.unknowns.length,
207
169
  error,
208
170
  });
209
171
  continue;
@@ -219,26 +181,27 @@ function commitAll(db, decryptResult, scanResults) {
219
181
  });
220
182
  const counts = buffer.commit(db, scannedFileId);
221
183
  setFileStatus(db, scannedFileId, "scanned", { raw_text: agentText });
222
- out.push({
184
+ committedFileIds.push(scannedFileId);
185
+ details.push({
223
186
  name: decryptedFile.fileName,
224
187
  relPath: decryptedFile.relPath,
225
188
  status: decryptedFile.replacesPriorScannedFileId ? "replaced" : "scanned",
226
189
  transactions: counts.transactions,
227
- concerns: counts.concerns,
190
+ unknowns: counts.unknowns,
228
191
  });
229
192
  }
230
193
  catch (err) {
231
- out.push({
194
+ details.push({
232
195
  name: decryptedFile.fileName,
233
196
  relPath: decryptedFile.relPath,
234
197
  status: "failed",
235
198
  transactions: 0,
236
- concerns: buffer.concerns.length,
199
+ unknowns: buffer.unknowns.length,
237
200
  error: err?.message ?? "commit failed",
238
201
  });
239
202
  }
240
203
  }
241
- return out;
204
+ return { details, committedFileIds };
242
205
  }
243
206
  /** Summary assembly */
244
207
  function buildSummary(total, details) {
@@ -248,22 +211,22 @@ function buildSummary(total, details) {
248
211
  replaced: 0,
249
212
  skipped: 0,
250
213
  failed: 0,
251
- concerns: 0,
214
+ unknowns: 0,
252
215
  details,
253
216
  };
254
217
  for (const d of details) {
255
218
  summary[d.status]++;
256
- summary.concerns += d.concerns;
219
+ summary.unknowns += d.unknowns;
257
220
  }
258
221
  return summary;
259
222
  }
260
223
  function buildAbortedSummary(total, decrypt) {
261
224
  const details = [
262
225
  ...decrypt.skipped.map(s => ({
263
- name: s.file.name, relPath: s.file.relPath, status: "skipped", transactions: 0, concerns: 0,
226
+ name: s.file.name, relPath: s.file.relPath, status: "skipped", transactions: 0, unknowns: 0,
264
227
  })),
265
228
  ...decrypt.failed.map(f => ({
266
- name: f.file.name, relPath: f.file.relPath, status: "failed", transactions: 0, concerns: 0, error: f.error,
229
+ name: f.file.name, relPath: f.file.relPath, status: "failed", transactions: 0, unknowns: 0, error: f.error,
267
230
  })),
268
231
  ];
269
232
  return buildSummary(total, details);
@@ -13,8 +13,8 @@ export function buildScanUserMessage(opts) {
13
13
  `2. Infer the primary account type (asset / liability / income / expense) from the document's header, account type field, and transaction patterns.`,
14
14
  `3. If this document references an account that isn't yet in the chart, call create_account once (pass parent_id under the matching top-level type root). Mask the account number to the last 4 digits.`,
15
15
  `4. Persist any document-level metadata you find (statement_day, due_day, points_balance, etc.) using update_account_metadata.`,
16
- `5. For every transaction in the document, call record_transaction with balanced debit/credit postings. Attach a merchant block (canonical_name + alias + default_account_id when categorization is confident) for any external counter-party. Reuse existing accounts; create expense categories under their parent (e.g. expense:food before expense:food:groceries) as needed. When you cannot categorize confidently, post the expense side to expense:uncategorized and call note_concern with kind="uncategorized_expense".`,
17
- `6. Never pause to ask the user. If a row is ambiguous, post your best-guess transaction first, then call note_concern with details and the new transaction_id. If a row is truly unparseable, skip it and call note_concern with the raw row text (no transaction_id). A missing row is better than a wrong row.`,
16
+ `5. For every transaction in the document, call record_transaction with balanced debit/credit postings. Attach a merchant block (canonical_name + alias + default_account_id when categorization is confident) for any external counter-party. Reuse existing accounts; create expense categories under their parent (e.g. expense:food before expense:food:groceries) as needed. When you cannot categorize confidently, post the expense side to expense:uncategorized and call note_unknown with kind="uncategorized_expense".`,
17
+ `6. Never pause to ask the user. If a row is ambiguous, post your best-guess transaction first, then call note_unknown with details and the new transaction_id. If a row is truly unparseable, skip it and call note_unknown with the raw row text (no transaction_id). A missing row is better than a wrong row.`,
18
18
  `7. When you are done, call mark_file_scanned with a short summary.`,
19
19
  ].join("\n");
20
20
  }
package/package.json CHANGED
@@ -1,9 +1,10 @@
1
1
  {
2
2
  "name": "plasalid",
3
- "version": "0.5.8",
4
- "description": "Plasalid — AI Harness for Personal Finance",
3
+ "version": "0.6.1",
4
+ "description": "Plasalid — The Harness Layer for Personal Finance",
5
5
  "keywords": [
6
6
  "finance",
7
+ "harness",
7
8
  "personal-finance",
8
9
  "aggregator",
9
10
  "parser",