plasalid 0.5.8 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -12
- package/dist/accounts/taxonomy.d.ts +1 -1
- package/dist/accounts/taxonomy.js +2 -2
- package/dist/ai/agent.d.ts +7 -6
- package/dist/ai/agent.js +9 -8
- package/dist/ai/personas.d.ts +1 -1
- package/dist/ai/personas.js +69 -66
- package/dist/ai/prompt-sections.d.ts +4 -5
- package/dist/ai/prompt-sections.js +11 -11
- package/dist/ai/system-prompt.d.ts +2 -3
- package/dist/ai/system-prompt.js +5 -5
- package/dist/ai/tools/common.js +13 -5
- package/dist/ai/tools/index.js +15 -15
- package/dist/ai/tools/ingest.d.ts +2 -2
- package/dist/ai/tools/ingest.js +210 -87
- package/dist/ai/tools/merchants.js +27 -12
- package/dist/ai/tools/read.js +36 -20
- package/dist/ai/tools/record.js +79 -19
- package/dist/ai/tools/resolve.d.ts +2 -0
- package/dist/ai/tools/resolve.js +195 -0
- package/dist/ai/tools/types.d.ts +5 -7
- package/dist/cli/commands/accounts.js +2 -2
- package/dist/cli/commands/record.js +4 -2
- package/dist/cli/commands/resolve.d.ts +2 -0
- package/dist/cli/commands/resolve.js +13 -0
- package/dist/cli/commands/scan.js +18 -22
- package/dist/cli/commands/status.js +4 -2
- package/dist/cli/format.js +1 -1
- package/dist/cli/index.js +10 -10
- package/dist/cli/ink/hooks/useFooterText.js +1 -1
- package/dist/cli/ink/hooks/useTextInput.js +0 -3
- package/dist/cli/ink/scan_dashboard.d.ts +2 -2
- package/dist/cli/ink/scan_dashboard.js +3 -3
- package/dist/cli/setup.js +6 -3
- package/dist/cli/ux.js +1 -1
- package/dist/db/queries/account-balance.d.ts +140 -0
- package/dist/db/queries/account-balance.js +355 -0
- package/dist/db/queries/account_balance.d.ts +0 -1
- package/dist/db/queries/account_balance.js +0 -10
- package/dist/db/queries/action-log.d.ts +29 -0
- package/dist/db/queries/action-log.js +27 -0
- package/dist/db/queries/action_log.d.ts +1 -1
- package/dist/db/queries/concerns.d.ts +10 -0
- package/dist/db/queries/concerns.js +21 -0
- package/dist/db/queries/transactions.d.ts +3 -22
- package/dist/db/queries/transactions.js +4 -5
- package/dist/db/queries/unknowns.d.ts +62 -0
- package/dist/db/queries/unknowns.js +114 -0
- package/dist/db/schema.js +3 -3
- package/dist/resolver/pipeline.d.ts +16 -0
- package/dist/resolver/pipeline.js +38 -0
- package/dist/resolver/prompts.d.ts +8 -0
- package/dist/resolver/prompts.js +26 -0
- package/dist/scanner/account-mutex.d.ts +1 -0
- package/dist/scanner/account-mutex.js +16 -0
- package/dist/scanner/buffer.d.ts +10 -10
- package/dist/scanner/buffer.js +15 -15
- package/dist/scanner/decrypt-queue.d.ts +57 -0
- package/dist/scanner/decrypt-queue.js +114 -0
- package/dist/scanner/detectors/correlations.d.ts +2 -0
- package/dist/scanner/detectors/correlations.js +51 -0
- package/dist/scanner/detectors/duplicates.d.ts +2 -0
- package/dist/scanner/detectors/duplicates.js +75 -0
- package/dist/scanner/detectors/index.d.ts +18 -0
- package/dist/scanner/detectors/index.js +39 -0
- package/dist/scanner/detectors/recurrences.d.ts +2 -0
- package/dist/scanner/detectors/recurrences.js +49 -0
- package/dist/scanner/detectors/similar_accounts.d.ts +2 -0
- package/dist/scanner/detectors/similar_accounts.js +64 -0
- package/dist/scanner/detectors/similarities.d.ts +2 -0
- package/dist/scanner/detectors/similarities.js +73 -0
- package/dist/scanner/detectors/types.d.ts +16 -0
- package/dist/scanner/detectors/types.js +1 -0
- package/dist/scanner/inspectors/correlations.d.ts +2 -0
- package/dist/scanner/inspectors/correlations.js +47 -0
- package/dist/scanner/inspectors/duplicates.d.ts +2 -0
- package/dist/scanner/inspectors/duplicates.js +75 -0
- package/dist/scanner/inspectors/index.d.ts +19 -0
- package/dist/scanner/inspectors/index.js +39 -0
- package/dist/scanner/inspectors/recurrences.d.ts +2 -0
- package/dist/scanner/inspectors/recurrences.js +49 -0
- package/dist/scanner/inspectors/similarities.d.ts +2 -0
- package/dist/scanner/inspectors/similarities.js +73 -0
- package/dist/scanner/inspectors/types.d.ts +16 -0
- package/dist/scanner/inspectors/types.js +1 -0
- package/dist/scanner/pipeline.d.ts +6 -4
- package/dist/scanner/pipeline.js +51 -88
- package/dist/scanner/prompts.js +2 -2
- package/package.json +3 -2
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { findDuplicateTransactions } from "../../db/queries/transactions.js";
|
|
2
|
+
import { formatAmount } from "../../currency.js";
|
|
3
|
+
/**
|
|
4
|
+
* Surface transaction pairs that look like the same posting recorded twice.
|
|
5
|
+
* One concern is emitted per duplicate group, attached to the newest member;
|
|
6
|
+
* earlier members are listed in the prompt so the user can compare side by
|
|
7
|
+
* side. Only groups that include at least one transaction from this scan run
|
|
8
|
+
* are surfaced — older-only groups would have been flagged on a prior run.
|
|
9
|
+
*
|
|
10
|
+
* Members are pruned before grouping: two transactions sharing source_file_id,
|
|
11
|
+
* date, and merchant_id are almost always two real charges (the statement
|
|
12
|
+
* legitimately lists Starbucks twice on the same day) and surface as noise.
|
|
13
|
+
*/
|
|
14
|
+
function detect(db, scope) {
|
|
15
|
+
if (scope.fileIds.length === 0)
|
|
16
|
+
return [];
|
|
17
|
+
const groups = findDuplicateTransactions(db);
|
|
18
|
+
if (groups.length === 0)
|
|
19
|
+
return [];
|
|
20
|
+
const inScope = transactionsInScope(db, scope.fileIds);
|
|
21
|
+
const out = [];
|
|
22
|
+
for (const rawGroup of groups) {
|
|
23
|
+
const group = dedupeSameFileSameDaySameMerchant(rawGroup);
|
|
24
|
+
if (group.length < 2)
|
|
25
|
+
continue;
|
|
26
|
+
if (!group.some(g => inScope.has(g.id)))
|
|
27
|
+
continue;
|
|
28
|
+
const sorted = [...group].sort((a, b) => a.date.localeCompare(b.date));
|
|
29
|
+
const newest = sorted[sorted.length - 1];
|
|
30
|
+
const others = sorted.slice(0, -1);
|
|
31
|
+
out.push({
|
|
32
|
+
file_id: null,
|
|
33
|
+
transaction_id: newest.id,
|
|
34
|
+
account_id: null,
|
|
35
|
+
kind: "duplicate",
|
|
36
|
+
prompt: buildPrompt(newest, others),
|
|
37
|
+
options: ["Delete this one", "Delete the older one", "Keep both", "Skip"],
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
return out;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Collapse same-file, same-date, same-merchant transactions to a single
|
|
44
|
+
* representative so they don't trigger a "duplicate" concern between
|
|
45
|
+
* themselves. (Across files or across dates is still flagged.)
|
|
46
|
+
*/
|
|
47
|
+
function dedupeSameFileSameDaySameMerchant(group) {
|
|
48
|
+
const seen = new Map();
|
|
49
|
+
for (const tx of group) {
|
|
50
|
+
if (tx.source_file_id == null) {
|
|
51
|
+
seen.set(tx.id, tx);
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
const key = `${tx.source_file_id}|${tx.date}|${tx.merchant_id ?? ""}`;
|
|
55
|
+
if (!seen.has(key))
|
|
56
|
+
seen.set(key, tx);
|
|
57
|
+
}
|
|
58
|
+
return Array.from(seen.values());
|
|
59
|
+
}
|
|
60
|
+
function buildPrompt(newest, others) {
|
|
61
|
+
const amount = formatAmount(newest.amount);
|
|
62
|
+
const lines = [
|
|
63
|
+
`${amount} on ${newest.date} (${newest.description}) — accounts: ${newest.account_names.join(", ")}`,
|
|
64
|
+
...others.map(o => ` matches ${o.date} (${o.description}) — accounts: ${o.account_names.join(", ")}`),
|
|
65
|
+
];
|
|
66
|
+
return `Possible duplicate transaction.\n${lines.join("\n")}`;
|
|
67
|
+
}
|
|
68
|
+
function transactionsInScope(db, fileIds) {
|
|
69
|
+
const placeholders = fileIds.map(() => "?").join(",");
|
|
70
|
+
const rows = db
|
|
71
|
+
.prepare(`SELECT id FROM transactions WHERE source_file_id IN (${placeholders})`)
|
|
72
|
+
.all(...fileIds);
|
|
73
|
+
return new Set(rows.map(r => r.id));
|
|
74
|
+
}
|
|
75
|
+
export const duplicatesDetector = { name: "duplicates", detect };
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { Detector, DetectorScope } from "./types.js";
|
|
3
|
+
/**
|
|
4
|
+
* The ordered list of post-commit detectors the scanner runs. Order matters
|
|
5
|
+
* only for the resolver's priority sweep, not for correctness — each detector
|
|
6
|
+
* emits concerns independently of the others.
|
|
7
|
+
*/
|
|
8
|
+
export declare const detectors: readonly Detector[];
|
|
9
|
+
export interface DetectionRunResult {
|
|
10
|
+
total: number;
|
|
11
|
+
byDetector: Record<string, number>;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Run every detector in order and insert any concerns they produce. Returns
|
|
15
|
+
* counts so the CLI can report "X concerns surfaced." Failure of one detector
|
|
16
|
+
* never aborts the run — it logs and the others still execute.
|
|
17
|
+
*/
|
|
18
|
+
export declare function runDetectors(db: Database.Database, scope: DetectorScope): DetectionRunResult;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { recordConcern } from "../../db/queries/concerns.js";
|
|
2
|
+
import { duplicatesDetector } from "./duplicates.js";
|
|
3
|
+
import { correlationsDetector } from "./correlations.js";
|
|
4
|
+
import { recurrencesDetector } from "./recurrences.js";
|
|
5
|
+
import { similarAccountsDetector } from "./similarities.js";
|
|
6
|
+
/**
|
|
7
|
+
* The ordered list of post-commit detectors the scanner runs. Order matters
|
|
8
|
+
* only for the resolver's priority sweep, not for correctness — each detector
|
|
9
|
+
* emits concerns independently of the others.
|
|
10
|
+
*/
|
|
11
|
+
export const detectors = [
|
|
12
|
+
duplicatesDetector,
|
|
13
|
+
correlationsDetector,
|
|
14
|
+
recurrencesDetector,
|
|
15
|
+
similarAccountsDetector,
|
|
16
|
+
];
|
|
17
|
+
/**
|
|
18
|
+
* Run every detector in order and insert any concerns they produce. Returns
|
|
19
|
+
* counts so the CLI can report "X concerns surfaced." Failure of one detector
|
|
20
|
+
* never aborts the run — it logs and the others still execute.
|
|
21
|
+
*/
|
|
22
|
+
export function runDetectors(db, scope) {
|
|
23
|
+
const byDetector = {};
|
|
24
|
+
let total = 0;
|
|
25
|
+
for (const detector of detectors) {
|
|
26
|
+
try {
|
|
27
|
+
const concerns = detector.detect(db, scope);
|
|
28
|
+
for (const c of concerns)
|
|
29
|
+
recordConcern(db, c);
|
|
30
|
+
byDetector[detector.name] = concerns.length;
|
|
31
|
+
total += concerns.length;
|
|
32
|
+
}
|
|
33
|
+
catch (err) {
|
|
34
|
+
byDetector[detector.name] = 0;
|
|
35
|
+
console.error(`[detector ${detector.name}] ${err?.message ?? err}`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return { total, byDetector };
|
|
39
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { findRecurrenceCandidates } from "../../db/queries/recurrences.js";
|
|
2
|
+
import { formatAmount } from "../../currency.js";
|
|
3
|
+
/**
|
|
4
|
+
* Surface recurrence candidates whose latest sighting landed in this scan run.
|
|
5
|
+
* One concern per candidate, attached to the most recent transaction in the
|
|
6
|
+
* group. Skips candidates whose median interval is "irregular" — those are
|
|
7
|
+
* unlikely to be real subscriptions and surfacing them just creates noise.
|
|
8
|
+
*/
|
|
9
|
+
function detect(db, scope) {
|
|
10
|
+
if (scope.fileIds.length === 0)
|
|
11
|
+
return [];
|
|
12
|
+
const candidates = findRecurrenceCandidates(db);
|
|
13
|
+
if (candidates.length === 0)
|
|
14
|
+
return [];
|
|
15
|
+
const inScope = transactionsInScope(db, scope.fileIds);
|
|
16
|
+
const out = [];
|
|
17
|
+
for (const candidate of candidates) {
|
|
18
|
+
if (candidate.implied_frequency === "irregular")
|
|
19
|
+
continue;
|
|
20
|
+
const latest = candidate.transactions[candidate.transactions.length - 1];
|
|
21
|
+
if (!inScope.has(latest.id))
|
|
22
|
+
continue;
|
|
23
|
+
out.push({
|
|
24
|
+
file_id: null,
|
|
25
|
+
transaction_id: latest.id,
|
|
26
|
+
account_id: candidate.account_id,
|
|
27
|
+
kind: "recurrence_candidate",
|
|
28
|
+
prompt: buildPrompt(candidate, latest),
|
|
29
|
+
options: ["Link as recurring", "Not recurring", "Skip"],
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
function buildPrompt(candidate, latest) {
|
|
35
|
+
const amount = formatAmount(candidate.amount, candidate.currency);
|
|
36
|
+
const occurrences = candidate.transactions.length;
|
|
37
|
+
return [
|
|
38
|
+
`Possible ${candidate.implied_frequency} recurrence on ${candidate.account_name}: ${amount} (${occurrences} sightings, median ${candidate.median_days_between} days apart).`,
|
|
39
|
+
`Latest: ${latest.date} — ${latest.description}`,
|
|
40
|
+
].join("\n");
|
|
41
|
+
}
|
|
42
|
+
function transactionsInScope(db, fileIds) {
|
|
43
|
+
const placeholders = fileIds.map(() => "?").join(",");
|
|
44
|
+
const rows = db
|
|
45
|
+
.prepare(`SELECT id FROM transactions WHERE source_file_id IN (${placeholders})`)
|
|
46
|
+
.all(...fileIds);
|
|
47
|
+
return new Set(rows.map(r => r.id));
|
|
48
|
+
}
|
|
49
|
+
export const recurrencesDetector = { name: "recurrences", detect };
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { findSimilarAccounts } from "../../db/queries/account_balance.js";
|
|
2
|
+
/**
|
|
3
|
+
* Flag pairs of accounts whose names are near-identical (Levenshtein ≥ 0.85).
|
|
4
|
+
* Runs whenever a scan committed at least one transaction — the assumption is
|
|
5
|
+
* that the scanner may have created a new account this run, so it's worth a
|
|
6
|
+
* fresh similarity sweep. Idempotent against existing open concerns: a pair
|
|
7
|
+
* already flagged is not flagged again. The resolver applies "Merge A into B"
|
|
8
|
+
* via merge_accounts.
|
|
9
|
+
*/
|
|
10
|
+
function detect(db, scope) {
|
|
11
|
+
if (scope.fileIds.length === 0)
|
|
12
|
+
return [];
|
|
13
|
+
const pairs = findSimilarAccounts(db);
|
|
14
|
+
if (pairs.length === 0)
|
|
15
|
+
return [];
|
|
16
|
+
const alreadyFlagged = loadAlreadyFlaggedAccountPairs(db);
|
|
17
|
+
const out = [];
|
|
18
|
+
for (const pair of pairs) {
|
|
19
|
+
const key = pairKey(pair.a.id, pair.b.id);
|
|
20
|
+
if (alreadyFlagged.has(key))
|
|
21
|
+
continue;
|
|
22
|
+
out.push({
|
|
23
|
+
file_id: null,
|
|
24
|
+
transaction_id: null,
|
|
25
|
+
account_id: pair.a.id,
|
|
26
|
+
kind: "similar_accounts",
|
|
27
|
+
prompt: `These two accounts look like the same thing (similarity ${pair.similarity}):\n ${pair.a.id} — ${pair.a.name}\n ${pair.b.id} — ${pair.b.name}`,
|
|
28
|
+
options: [`Merge ${pair.b.id} into ${pair.a.id}`, `Merge ${pair.a.id} into ${pair.b.id}`, "Keep separate", "Skip"],
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
return out;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Open `similar_accounts` concerns embed the other account's id in their
|
|
35
|
+
* options strings ("Merge X into Y"). Parse those out so we don't re-flag a
|
|
36
|
+
* pair the user has already seen but not yet answered.
|
|
37
|
+
*/
|
|
38
|
+
function loadAlreadyFlaggedAccountPairs(db) {
|
|
39
|
+
const rows = db
|
|
40
|
+
.prepare(`SELECT account_id, options_json FROM concerns
|
|
41
|
+
WHERE resolved_at IS NULL AND kind = 'similar_accounts' AND account_id IS NOT NULL`)
|
|
42
|
+
.all();
|
|
43
|
+
const out = new Set();
|
|
44
|
+
for (const row of rows) {
|
|
45
|
+
if (!row.options_json)
|
|
46
|
+
continue;
|
|
47
|
+
try {
|
|
48
|
+
const options = JSON.parse(row.options_json);
|
|
49
|
+
for (const opt of options) {
|
|
50
|
+
const match = opt.match(/Merge (\S+) into (\S+)/);
|
|
51
|
+
if (match)
|
|
52
|
+
out.add(pairKey(match[1], match[2]));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
// skip malformed options_json
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return out;
|
|
60
|
+
}
|
|
61
|
+
function pairKey(a, b) {
|
|
62
|
+
return [a, b].sort().join("|");
|
|
63
|
+
}
|
|
64
|
+
export const similarAccountsDetector = { name: "similar_accounts", detect };
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { findSimilarAccounts } from "../../db/queries/account-balance.js";
|
|
2
|
+
/**
|
|
3
|
+
* Flag pairs of accounts whose names are near-identical (Levenshtein ≥ 0.85).
|
|
4
|
+
* Runs whenever a scan committed at least one transaction — the assumption is
|
|
5
|
+
* that the scanner may have created a new account this run, so it's worth a
|
|
6
|
+
* fresh similarity sweep. Idempotent against existing open concerns: a pair
|
|
7
|
+
* already flagged is not flagged again. The resolver applies "Merge A into B"
|
|
8
|
+
* via merge_accounts.
|
|
9
|
+
*/
|
|
10
|
+
function detect(db, scope) {
|
|
11
|
+
if (scope.fileIds.length === 0)
|
|
12
|
+
return [];
|
|
13
|
+
const pairs = findSimilarAccounts(db);
|
|
14
|
+
if (pairs.length === 0)
|
|
15
|
+
return [];
|
|
16
|
+
const alreadyFlagged = loadAlreadyFlaggedAccountPairs(db);
|
|
17
|
+
const out = [];
|
|
18
|
+
for (const pair of pairs) {
|
|
19
|
+
const key = pairKey(pair.a.id, pair.b.id);
|
|
20
|
+
if (alreadyFlagged.has(key))
|
|
21
|
+
continue;
|
|
22
|
+
out.push({
|
|
23
|
+
file_id: null,
|
|
24
|
+
transaction_id: null,
|
|
25
|
+
account_id: pair.a.id,
|
|
26
|
+
kind: "similar_accounts",
|
|
27
|
+
prompt: `These two accounts look like the same thing (similarity ${pair.similarity}):\n ${pair.a.id} — ${pair.a.name}\n ${pair.b.id} — ${pair.b.name}`,
|
|
28
|
+
options: [
|
|
29
|
+
`Merge ${pair.b.id} into ${pair.a.id}`,
|
|
30
|
+
`Merge ${pair.a.id} into ${pair.b.id}`,
|
|
31
|
+
"Keep separate",
|
|
32
|
+
"Skip",
|
|
33
|
+
],
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
return out;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* `similar_accounts` concerns (open OR resolved) embed the other account's id
|
|
40
|
+
* in their options strings ("Merge X into Y"). Parse those out so we don't
|
|
41
|
+
* re-flag a pair the user has already seen — including pairs they've already
|
|
42
|
+
* answered "Keep separate" on a prior run.
|
|
43
|
+
*/
|
|
44
|
+
function loadAlreadyFlaggedAccountPairs(db) {
|
|
45
|
+
const rows = db
|
|
46
|
+
.prepare(`SELECT account_id, options_json FROM concerns
|
|
47
|
+
WHERE kind = 'similar_accounts' AND account_id IS NOT NULL`)
|
|
48
|
+
.all();
|
|
49
|
+
const out = new Set();
|
|
50
|
+
for (const row of rows) {
|
|
51
|
+
if (!row.options_json)
|
|
52
|
+
continue;
|
|
53
|
+
try {
|
|
54
|
+
const options = JSON.parse(row.options_json);
|
|
55
|
+
for (const opt of options) {
|
|
56
|
+
const match = opt.match(/Merge (\S+) into (\S+)/);
|
|
57
|
+
if (match)
|
|
58
|
+
out.add(pairKey(match[1], match[2]));
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
// skip malformed options_json
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return out;
|
|
66
|
+
}
|
|
67
|
+
function pairKey(a, b) {
|
|
68
|
+
return [a, b].sort().join("|");
|
|
69
|
+
}
|
|
70
|
+
export const similarAccountsDetector = {
|
|
71
|
+
name: "similar_accounts",
|
|
72
|
+
detect,
|
|
73
|
+
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { RecordConcernInput } from "../../db/queries/concerns.js";
|
|
3
|
+
/**
|
|
4
|
+
* Scope passed to every detector by the scanner's Phase 5. Detectors emit
|
|
5
|
+
* concerns for transactions whose `source_file_id` is in `fileIds` (or for
|
|
6
|
+
* cross-pair findings where at least one side lives in that set). Detectors
|
|
7
|
+
* are free to read the wider DB for context — the scope is a filter for what
|
|
8
|
+
* to surface, not a limit on what to read.
|
|
9
|
+
*/
|
|
10
|
+
export interface DetectorScope {
|
|
11
|
+
readonly fileIds: readonly string[];
|
|
12
|
+
}
|
|
13
|
+
export interface Detector {
|
|
14
|
+
readonly name: string;
|
|
15
|
+
detect(db: Database.Database, scope: DetectorScope): RecordConcernInput[];
|
|
16
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { findCorrelatedTransactions } from "../../db/queries/transactions.js";
|
|
2
|
+
import { formatAmount } from "../../currency.js";
|
|
3
|
+
/**
|
|
4
|
+
* Cross-account correlation: a single money movement that landed on two
|
|
5
|
+
* different accounts (e.g. transfer from bank to card recorded once per
|
|
6
|
+
* statement). One unknown per pair, attached to the newer side. Only pairs
|
|
7
|
+
* with at least one side in `fileIds` are surfaced.
|
|
8
|
+
*/
|
|
9
|
+
function inspect(db, scope) {
|
|
10
|
+
if (scope.fileIds.length === 0)
|
|
11
|
+
return [];
|
|
12
|
+
const pairs = findCorrelatedTransactions(db);
|
|
13
|
+
if (pairs.length === 0)
|
|
14
|
+
return [];
|
|
15
|
+
const inScope = transactionsInScope(db, scope.fileIds);
|
|
16
|
+
const out = [];
|
|
17
|
+
for (const pair of pairs) {
|
|
18
|
+
if (!inScope.has(pair.a.id) && !inScope.has(pair.b.id))
|
|
19
|
+
continue;
|
|
20
|
+
const [older, newer] = pair.a.date <= pair.b.date ? [pair.a, pair.b] : [pair.b, pair.a];
|
|
21
|
+
out.push({
|
|
22
|
+
file_id: null,
|
|
23
|
+
transaction_id: newer.id,
|
|
24
|
+
account_id: null,
|
|
25
|
+
kind: "correlation",
|
|
26
|
+
prompt: buildPrompt(pair, older, newer),
|
|
27
|
+
options: ["Merge into one transaction", "Keep separate (these are two real events)", "Skip"],
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
return out;
|
|
31
|
+
}
|
|
32
|
+
function buildPrompt(pair, older, newer) {
|
|
33
|
+
const amount = formatAmount(pair.amount, pair.currency);
|
|
34
|
+
return [
|
|
35
|
+
`Possible cross-account correlation (${amount}, ${pair.day_gap} day(s) apart).`,
|
|
36
|
+
` ${newer.date} — ${newer.description} — ${newer.account_names.join(", ")}`,
|
|
37
|
+
` ${older.date} — ${older.description} — ${older.account_names.join(", ")}`,
|
|
38
|
+
].join("\n");
|
|
39
|
+
}
|
|
40
|
+
function transactionsInScope(db, fileIds) {
|
|
41
|
+
const placeholders = fileIds.map(() => "?").join(",");
|
|
42
|
+
const rows = db
|
|
43
|
+
.prepare(`SELECT id FROM transactions WHERE source_file_id IN (${placeholders})`)
|
|
44
|
+
.all(...fileIds);
|
|
45
|
+
return new Set(rows.map(r => r.id));
|
|
46
|
+
}
|
|
47
|
+
export const correlationsInspector = { name: "correlations", inspect };
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { findDuplicateTransactions } from "../../db/queries/transactions.js";
|
|
2
|
+
import { formatAmount } from "../../currency.js";
|
|
3
|
+
/**
|
|
4
|
+
* Surface transaction pairs that look like the same posting recorded twice.
|
|
5
|
+
* One unknown is emitted per duplicate group, attached to the newest member;
|
|
6
|
+
* earlier members are listed in the prompt so the user can compare side by
|
|
7
|
+
* side. Only groups that include at least one transaction from this scan run
|
|
8
|
+
* are surfaced — older-only groups would have been flagged on a prior run.
|
|
9
|
+
*
|
|
10
|
+
* Members are pruned before grouping: two transactions sharing source_file_id,
|
|
11
|
+
* date, and merchant_id are almost always two real charges (the statement
|
|
12
|
+
* legitimately lists Starbucks twice on the same day) and surface as noise.
|
|
13
|
+
*/
|
|
14
|
+
function inspect(db, scope) {
|
|
15
|
+
if (scope.fileIds.length === 0)
|
|
16
|
+
return [];
|
|
17
|
+
const groups = findDuplicateTransactions(db);
|
|
18
|
+
if (groups.length === 0)
|
|
19
|
+
return [];
|
|
20
|
+
const inScope = transactionsInScope(db, scope.fileIds);
|
|
21
|
+
const out = [];
|
|
22
|
+
for (const rawGroup of groups) {
|
|
23
|
+
const group = dedupeSameFileSameDaySameMerchant(rawGroup);
|
|
24
|
+
if (group.length < 2)
|
|
25
|
+
continue;
|
|
26
|
+
if (!group.some(g => inScope.has(g.id)))
|
|
27
|
+
continue;
|
|
28
|
+
const sorted = [...group].sort((a, b) => a.date.localeCompare(b.date));
|
|
29
|
+
const newest = sorted[sorted.length - 1];
|
|
30
|
+
const others = sorted.slice(0, -1);
|
|
31
|
+
out.push({
|
|
32
|
+
file_id: null,
|
|
33
|
+
transaction_id: newest.id,
|
|
34
|
+
account_id: null,
|
|
35
|
+
kind: "duplicate",
|
|
36
|
+
prompt: buildPrompt(newest, others),
|
|
37
|
+
options: ["Delete this one", "Delete the older one", "Keep both", "Skip"],
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
return out;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Collapse same-file, same-date, same-merchant transactions to a single
|
|
44
|
+
* representative so they don't trigger a "duplicate" unknown between
|
|
45
|
+
* themselves. (Across files or across dates is still flagged.)
|
|
46
|
+
*/
|
|
47
|
+
function dedupeSameFileSameDaySameMerchant(group) {
|
|
48
|
+
const seen = new Map();
|
|
49
|
+
for (const tx of group) {
|
|
50
|
+
if (tx.source_file_id == null) {
|
|
51
|
+
seen.set(tx.id, tx);
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
const key = `${tx.source_file_id}|${tx.date}|${tx.merchant_id ?? ""}`;
|
|
55
|
+
if (!seen.has(key))
|
|
56
|
+
seen.set(key, tx);
|
|
57
|
+
}
|
|
58
|
+
return Array.from(seen.values());
|
|
59
|
+
}
|
|
60
|
+
function buildPrompt(newest, others) {
|
|
61
|
+
const amount = formatAmount(newest.amount);
|
|
62
|
+
const lines = [
|
|
63
|
+
`${amount} on ${newest.date} (${newest.description}) — accounts: ${newest.account_names.join(", ")}`,
|
|
64
|
+
...others.map(o => ` matches ${o.date} (${o.description}) — accounts: ${o.account_names.join(", ")}`),
|
|
65
|
+
];
|
|
66
|
+
return `Possible duplicate transaction.\n${lines.join("\n")}`;
|
|
67
|
+
}
|
|
68
|
+
function transactionsInScope(db, fileIds) {
|
|
69
|
+
const placeholders = fileIds.map(() => "?").join(",");
|
|
70
|
+
const rows = db
|
|
71
|
+
.prepare(`SELECT id FROM transactions WHERE source_file_id IN (${placeholders})`)
|
|
72
|
+
.all(...fileIds);
|
|
73
|
+
return new Set(rows.map(r => r.id));
|
|
74
|
+
}
|
|
75
|
+
export const duplicatesInspector = { name: "duplicates", inspect };
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
import type { Inspector, InspectorScope } from "./types.js";
|
|
3
|
+
export type { Inspector, InspectorScope } from "./types.js";
|
|
4
|
+
/**
|
|
5
|
+
* The ordered list of post-commit inspectors the scanner runs. Order matters
|
|
6
|
+
* only for the resolver's priority sweep, not for correctness — each inspector
|
|
7
|
+
* emits unknowns independently of the others.
|
|
8
|
+
*/
|
|
9
|
+
export declare const inspectors: readonly Inspector[];
|
|
10
|
+
export interface InspectionRunResult {
|
|
11
|
+
total: number;
|
|
12
|
+
byInspector: Record<string, number>;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Run every inspector in order and insert any unknowns they produce. Returns
|
|
16
|
+
* counts so the CLI can report "X unknowns surfaced." Failure of one inspector
|
|
17
|
+
* never aborts the run — it logs and the others still execute.
|
|
18
|
+
*/
|
|
19
|
+
export declare function runInspectors(db: Database.Database, scope: InspectorScope): InspectionRunResult;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { recordUnknown } from "../../db/queries/unknowns.js";
|
|
2
|
+
import { duplicatesInspector } from "./duplicates.js";
|
|
3
|
+
import { correlationsInspector } from "./correlations.js";
|
|
4
|
+
import { recurrencesInspector } from "./recurrences.js";
|
|
5
|
+
import { similarAccountsInspector } from "./similarities.js";
|
|
6
|
+
/**
|
|
7
|
+
* The ordered list of post-commit inspectors the scanner runs. Order matters
|
|
8
|
+
* only for the resolver's priority sweep, not for correctness — each inspector
|
|
9
|
+
* emits unknowns independently of the others.
|
|
10
|
+
*/
|
|
11
|
+
export const inspectors = [
|
|
12
|
+
duplicatesInspector,
|
|
13
|
+
correlationsInspector,
|
|
14
|
+
recurrencesInspector,
|
|
15
|
+
similarAccountsInspector,
|
|
16
|
+
];
|
|
17
|
+
/**
|
|
18
|
+
* Run every inspector in order and insert any unknowns they produce. Returns
|
|
19
|
+
* counts so the CLI can report "X unknowns surfaced." Failure of one inspector
|
|
20
|
+
* never aborts the run — it logs and the others still execute.
|
|
21
|
+
*/
|
|
22
|
+
export function runInspectors(db, scope) {
|
|
23
|
+
const byInspector = {};
|
|
24
|
+
let total = 0;
|
|
25
|
+
for (const inspector of inspectors) {
|
|
26
|
+
try {
|
|
27
|
+
const unknowns = inspector.inspect(db, scope);
|
|
28
|
+
for (const u of unknowns)
|
|
29
|
+
recordUnknown(db, u);
|
|
30
|
+
byInspector[inspector.name] = unknowns.length;
|
|
31
|
+
total += unknowns.length;
|
|
32
|
+
}
|
|
33
|
+
catch (err) {
|
|
34
|
+
byInspector[inspector.name] = 0;
|
|
35
|
+
console.error(`[inspector ${inspector.name}] ${err?.message ?? err}`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return { total, byInspector };
|
|
39
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { findRecurrenceCandidates } from "../../db/queries/recurrences.js";
|
|
2
|
+
import { formatAmount } from "../../currency.js";
|
|
3
|
+
/**
|
|
4
|
+
* Surface recurrence candidates whose latest sighting landed in this scan run.
|
|
5
|
+
* One unknown per candidate, attached to the most recent transaction in the
|
|
6
|
+
* group. Skips candidates whose median interval is "irregular" — those are
|
|
7
|
+
* unlikely to be real subscriptions and surfacing them just creates noise.
|
|
8
|
+
*/
|
|
9
|
+
function inspect(db, scope) {
|
|
10
|
+
if (scope.fileIds.length === 0)
|
|
11
|
+
return [];
|
|
12
|
+
const candidates = findRecurrenceCandidates(db);
|
|
13
|
+
if (candidates.length === 0)
|
|
14
|
+
return [];
|
|
15
|
+
const inScope = transactionsInScope(db, scope.fileIds);
|
|
16
|
+
const out = [];
|
|
17
|
+
for (const candidate of candidates) {
|
|
18
|
+
if (candidate.implied_frequency === "irregular")
|
|
19
|
+
continue;
|
|
20
|
+
const latest = candidate.transactions[candidate.transactions.length - 1];
|
|
21
|
+
if (!inScope.has(latest.id))
|
|
22
|
+
continue;
|
|
23
|
+
out.push({
|
|
24
|
+
file_id: null,
|
|
25
|
+
transaction_id: latest.id,
|
|
26
|
+
account_id: candidate.account_id,
|
|
27
|
+
kind: "recurrence_candidate",
|
|
28
|
+
prompt: buildPrompt(candidate, latest),
|
|
29
|
+
options: ["Link as recurring", "Not recurring", "Skip"],
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
function buildPrompt(candidate, latest) {
|
|
35
|
+
const amount = formatAmount(candidate.amount, candidate.currency);
|
|
36
|
+
const occurrences = candidate.transactions.length;
|
|
37
|
+
return [
|
|
38
|
+
`Possible ${candidate.implied_frequency} recurrence on ${candidate.account_name}: ${amount} (${occurrences} sightings, median ${candidate.median_days_between} days apart).`,
|
|
39
|
+
`Latest: ${latest.date} — ${latest.description}`,
|
|
40
|
+
].join("\n");
|
|
41
|
+
}
|
|
42
|
+
function transactionsInScope(db, fileIds) {
|
|
43
|
+
const placeholders = fileIds.map(() => "?").join(",");
|
|
44
|
+
const rows = db
|
|
45
|
+
.prepare(`SELECT id FROM transactions WHERE source_file_id IN (${placeholders})`)
|
|
46
|
+
.all(...fileIds);
|
|
47
|
+
return new Set(rows.map(r => r.id));
|
|
48
|
+
}
|
|
49
|
+
export const recurrencesInspector = { name: "recurrences", inspect };
|