@rvanbaalen/ofxreader 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ import type { Transaction } from "../model.ts";
2
+ import { normalizeDescriptor } from "./normalize.ts";
3
+
4
+ export type Candidate = {
5
+ normalized: string;
6
+ examples: string[]; // up to 3 raw descriptors that normalized to this
7
+ count: number; // how many transactions share this normalized descriptor
8
+ similarity: number; // Dice similarity to the query (0..1, 2 decimals)
9
+ };
10
+
11
+ /** The raw descriptor we treat as the vendor identity: name + memo + payee. */
12
+ export function descriptorOf(t: Transaction): string {
13
+ return [t.name, t.memo, t.payee].filter((x) => x != null && x !== "").join(" ");
14
+ }
15
+
16
+ /** Confirmed match: a stored signature is a substring of the normalized descriptor. */
17
+ export function confirmedMatch(t: Transaction, signatures: string[]): boolean {
18
+ if (signatures.length === 0) return false;
19
+ const norm = normalizeDescriptor(descriptorOf(t));
20
+ return signatures.some((sig) => sig !== "" && norm.includes(sig));
21
+ }
22
+
23
+ /** Sørensen–Dice similarity over character bigrams (0..1). No dependency. */
24
+ export function diceSimilarity(a: string, b: string): number {
25
+ const A = bigrams(a);
26
+ const B = bigrams(b);
27
+ if (A.size === 0 || B.size === 0) return a === b ? 1 : 0;
28
+ let intersection = 0;
29
+ for (const [gram, countA] of A) {
30
+ const countB = B.get(gram);
31
+ if (countB != null) intersection += Math.min(countA, countB);
32
+ }
33
+ return (2 * intersection) / (sum(A) + sum(B));
34
+ }
35
+
36
+ /**
37
+ * Rank distinct normalized descriptors among `transactions` by their best Dice
38
+ * similarity to any of `queryTerms`, keeping those at or above `threshold`.
39
+ */
40
+ export function rankCandidates(
41
+ transactions: Transaction[],
42
+ queryTerms: string[],
43
+ threshold: number,
44
+ limit = 10,
45
+ ): Candidate[] {
46
+ const groups = new Map<string, { examples: Set<string>; count: number }>();
47
+ for (const t of transactions) {
48
+ const raw = descriptorOf(t);
49
+ const norm = normalizeDescriptor(raw);
50
+ if (norm === "") continue;
51
+ const g = groups.get(norm) ?? { examples: new Set<string>(), count: 0 };
52
+ if (raw !== "" && g.examples.size < 3) g.examples.add(raw);
53
+ g.count += 1;
54
+ groups.set(norm, g);
55
+ }
56
+
57
+ const terms = queryTerms.filter((x) => x !== "");
58
+ const out: Candidate[] = [];
59
+ for (const [norm, g] of groups) {
60
+ let sim = 0;
61
+ for (const term of terms) sim = Math.max(sim, diceSimilarity(norm, term));
62
+ if (sim >= threshold) {
63
+ out.push({ normalized: norm, examples: [...g.examples], count: g.count, similarity: round2(sim) });
64
+ }
65
+ }
66
+ out.sort((a, b) => b.similarity - a.similarity || b.count - a.count);
67
+ return out.slice(0, limit);
68
+ }
69
+
70
+ function bigrams(s: string): Map<string, number> {
71
+ const t = s.replace(/\s+/g, " ").trim();
72
+ const m = new Map<string, number>();
73
+ for (let i = 0; i < t.length - 1; i++) {
74
+ const gram = t.slice(i, i + 2);
75
+ m.set(gram, (m.get(gram) ?? 0) + 1);
76
+ }
77
+ return m;
78
+ }
79
+
80
+ function sum(m: Map<string, number>): number {
81
+ let n = 0;
82
+ for (const v of m.values()) n += v;
83
+ return n;
84
+ }
85
+
86
+ function round2(n: number): number {
87
+ return Math.round(n * 100) / 100;
88
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Normalize a raw OFX transaction descriptor into a stable, comparable core.
3
+ *
4
+ * "SQ *JASONS CARO 0123" -> "JASONS CARO"
5
+ * "TST* JASONSCAROUSEL" -> "JASONSCAROUSEL"
6
+ * "POS PURCHASE WHOLEFDS" -> "WHOLEFDS"
7
+ *
8
+ * Heuristic by design: confirmed signatures are exact substrings derived from real
9
+ * descriptors, so perfect normalization is not required — it mainly removes payment-
10
+ * processor noise so fuzzy candidate matching and substring matching behave sensibly.
11
+ */
12
+
13
+ // Leading payment-processor "XX *" token, e.g. "SQ *", "TST*", "PP*", "IZ *".
14
+ const STAR_PREFIX = /^[A-Z0-9]{2,5}\s*\*\s*/;
15
+
16
+ // Conservative leading noise words (kept short to avoid clipping real names).
17
+ const LEADING_NOISE = /^(POS|PURCHASE|DEBIT|CREDIT|CHECKCARD|RECURRING|ACH|WWW)\b[\s.]*/;
18
+
19
+ export function normalizeDescriptor(input: string | null | undefined): string {
20
+ let s = (input ?? "").toUpperCase();
21
+
22
+ s = s.replace(STAR_PREFIX, " ").trimStart();
23
+
24
+ let prev = "";
25
+ while (s !== prev) {
26
+ prev = s;
27
+ s = s.replace(LEADING_NOISE, " ").trimStart();
28
+ }
29
+
30
+ // Drop digits and punctuation (store numbers, "#123", locations like "CA"
31
+ // survive as letters but rarely hurt substring matching).
32
+ s = s.replace(/[^A-Z\s]/g, " ");
33
+
34
+ return s.replace(/\s+/g, " ").trim();
35
+ }
@@ -0,0 +1,41 @@
1
+ import type { Statement } from "../model.ts";
2
+ import { filterTransactions } from "../query.ts";
3
+ import type { TransactionFilters, QueryResult } from "../query.ts";
4
+ import { confirmedMatch, rankCandidates } from "./match.ts";
5
+ import type { Candidate } from "./match.ts";
6
+ import { normalizeDescriptor } from "./normalize.ts";
7
+ import { findVendorKey } from "./store.ts";
8
+ import type { VendorStore } from "./store.ts";
9
+
10
+ const SUGGEST_THRESHOLD = 0.6;
11
+
12
+ export type VendorQueryResult = QueryResult & {
13
+ vendor: string; // canonical name (resolved key, or the query verbatim)
14
+ resolved: boolean; // was the vendor found in the store?
15
+ vendorCandidates: Candidate[]; // fuzzy, unconfirmed descriptors to propose
16
+ };
17
+
18
+ /**
19
+ * Resolve a vendor query: confirmed matches become the result (after the usual
20
+ * date/amount filters); the remaining descriptors are ranked as fuzzy candidates.
21
+ */
22
+ export function resolveVendorQuery(
23
+ store: VendorStore,
24
+ statements: Statement[],
25
+ vendor: string,
26
+ filters: TransactionFilters,
27
+ ): VendorQueryResult {
28
+ const all = statements.flatMap((s) => s.transactions);
29
+ const key = findVendorKey(store, vendor);
30
+ const signatures = key != null ? (store.vendors[key]?.signatures ?? []) : [];
31
+
32
+ const confirmed = all.filter((t) => confirmedMatch(t, signatures));
33
+ const confirmedSet = new Set(confirmed);
34
+ const result = filterTransactions(confirmed, filters);
35
+
36
+ const remaining = all.filter((t) => !confirmedSet.has(t));
37
+ const queryTerms = [normalizeDescriptor(vendor), ...signatures];
38
+ const vendorCandidates = rankCandidates(remaining, queryTerms, SUGGEST_THRESHOLD);
39
+
40
+ return { ...result, vendor: key ?? vendor, resolved: key != null, vendorCandidates };
41
+ }
@@ -0,0 +1,93 @@
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { dirname, join } from "node:path";
4
+ import { OfxError } from "../parser.ts";
5
+ import { normalizeDescriptor } from "./normalize.ts";
6
+
7
+ export type Vendor = {
8
+ signatures: string[]; // normalized, confirmed cores (for deterministic matching)
9
+ raw: string[]; // original confirmed descriptors (provenance)
10
+ updatedAt?: string; // YYYY-MM-DD
11
+ };
12
+
13
+ export type VendorStore = {
14
+ version: number;
15
+ vendors: Record<string, Vendor>;
16
+ };
17
+
18
+ /** Resolve the vendor store path: $OFXREADER_VENDORS, else $XDG_CONFIG_HOME, else ~/.config. */
19
+ export function storePath(): string {
20
+ const override = process.env.OFXREADER_VENDORS;
21
+ if (override != null && override !== "") return override;
22
+ const xdg = process.env.XDG_CONFIG_HOME;
23
+ const base = xdg != null && xdg.trim() !== "" ? xdg : join(homedir(), ".config");
24
+ return join(base, "ofxreader", "vendors.json");
25
+ }
26
+
27
+ /** Load the store. A missing file is an empty store; corrupt JSON is an error. */
28
+ export function load(path = storePath()): VendorStore {
29
+ if (!existsSync(path)) return { version: 1, vendors: {} };
30
+ let text: string;
31
+ try {
32
+ text = readFileSync(path, "utf8");
33
+ } catch (err) {
34
+ throw new OfxError("VENDOR_STORE_ERROR", `Could not read vendor store ${path}: ${(err as Error).message}`);
35
+ }
36
+ try {
37
+ const data = JSON.parse(text) as Partial<VendorStore>;
38
+ if (data == null || typeof data !== "object" || typeof data.vendors !== "object") {
39
+ throw new Error("expected an object with a `vendors` map");
40
+ }
41
+ return { version: data.version ?? 1, vendors: data.vendors as Record<string, Vendor> };
42
+ } catch (err) {
43
+ throw new OfxError("VENDOR_STORE_ERROR", `Vendor store ${path} is not valid: ${(err as Error).message}`);
44
+ }
45
+ }
46
+
47
+ /** Persist the store, creating the parent directory if needed. */
48
+ export function save(store: VendorStore, path = storePath()): void {
49
+ mkdirSync(dirname(path), { recursive: true });
50
+ writeFileSync(path, JSON.stringify(store, null, 2) + "\n");
51
+ }
52
+
53
+ /** Case-insensitive lookup of a vendor's canonical key. */
54
+ export function findVendorKey(store: VendorStore, name: string): string | null {
55
+ const lower = name.toLowerCase();
56
+ for (const key of Object.keys(store.vendors)) {
57
+ if (key.toLowerCase() === lower) return key;
58
+ }
59
+ return null;
60
+ }
61
+
62
+ /**
63
+ * Add confirmed raw descriptors to a vendor (creating it if new), deriving
64
+ * normalized signatures. Mutates and returns the updated vendor entry.
65
+ */
66
+ export function learn(
67
+ store: VendorStore,
68
+ name: string,
69
+ rawDescriptors: string[],
70
+ today: string,
71
+ ): Vendor {
72
+ const key = findVendorKey(store, name) ?? name;
73
+ const existing = store.vendors[key] ?? { signatures: [], raw: [] };
74
+ const signatures = new Set(existing.signatures);
75
+ const raw = new Set(existing.raw);
76
+
77
+ for (const descriptor of rawDescriptors) {
78
+ const trimmed = (descriptor ?? "").trim();
79
+ if (trimmed === "") continue;
80
+ raw.add(trimmed);
81
+ const signature = normalizeDescriptor(trimmed);
82
+ if (signature !== "") signatures.add(signature);
83
+ }
84
+
85
+ const vendor: Vendor = { signatures: [...signatures], raw: [...raw], updatedAt: today };
86
+ store.vendors[key] = vendor;
87
+ return vendor;
88
+ }
89
+
90
+ /** Today's date as YYYY-MM-DD (for `updatedAt`). */
91
+ export function today(): string {
92
+ return new Date().toISOString().slice(0, 10);
93
+ }
package/src/version.ts ADDED
@@ -0,0 +1,9 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { fileURLToPath } from "node:url";
3
+
4
+ /** Read the package version from package.json (single source of truth). */
5
+ export function getVersion(): string {
6
+ const pkgPath = fileURLToPath(new URL("../package.json", import.meta.url));
7
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf8")) as { version?: string };
8
+ return pkg.version ?? "0.0.0";
9
+ }