@plan-fi/imports 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,763 @@
1
+ // csv.mjs — CSV exports → Canonical Financial Profile. The KEYLESS path: no
2
+ // aggregator credentials, just the files a user can download from any
3
+ // brokerage/bank today ("Download → CSV").
4
+ //
5
+ // Input contract (no provider API behind this one — the caller collects files):
6
+ // {
7
+ // files: [{ name?, kind?: 'accounts'|'holdings'|'transactions', content: string }],
8
+ // owner, // onboarding context, same as every adapter
9
+ // asOf, // ISO snapshot timestamp
10
+ // }
11
+ // `kind` is optional — when omitted the file's HEADER FINGERPRINT decides
12
+ // (positions dialects → holdings, date+amount → transactions, else accounts).
13
+ //
14
+ // Column mapping is dialect-driven: DIALECTS is a table keyed by header
15
+ // fingerprints for known broker exports (Fidelity positions, Schwab positions,
16
+ // Vanguard downloads), the consumer finance tools the FIRE audience actually
17
+ // uses (Monarch Money, YNAB, Empower/Personal Capital, Copilot Money), plus
18
+ // generic accounts/transactions layouts. A file whose
19
+ // headers match NO dialect still imports via best-effort generic mapping
20
+ // (name-ish column + money-ish column) with a CSV_UNMAPPED_COLUMNS warning
21
+ // naming every column that couldn't be mapped — balances are never silently
22
+ // dropped, and nothing is fabricated to make the file look richer than it is.
23
+ //
24
+ // Honesty rules (CSV carries less signal than any API):
25
+ // - A Type/Account Type column is classified via classify(), same as the
26
+ // API adapters. NO type column → the account NAME is used as a hint, and
27
+ // the result ALWAYS carries CLASSIFICATION_GUESSED (a name is a guess).
28
+ // No usable signal at all → low-confidence taxable + the same warning.
29
+ // - Money cells handle currency symbols, thousands commas, and
30
+ // accounting-style parenthesized negatives ("(1,850.00)" → -1850).
31
+ // - Positions dialects have no account-type column, so every account they
32
+ // produce is name-guessed (warned). Missing cost basis ("--") → the same
33
+ // NO_COST_BASIS info warning the API adapters emit.
34
+ // - Transactions files feed the shared contribution inference with the same
35
+ // growth-exclusion rules as the siblings (dividends/interest excluded,
36
+ // unlabeled deposits counted coarsely + COARSE_INFERENCE).
37
+ //
38
+ // Only CSV quirk-handling lives here; ALL Planfi domain logic stays in
39
+ // to-planfi.mjs, shared with every other adapter.
40
+ //
41
+ // @typedef {import('../canonical').CanonicalFinancialProfile} CFP
42
+ // @typedef {import('../canonical').SourceAdapter} SourceAdapter
43
+
44
+ import { classify } from '../classify.mjs';
45
+ import { contributionsByAccount } from '../contributions.mjs';
46
+ import { arr, defaultAsOf, warning } from '../util.mjs';
47
+
48
+ // Same inflow/growth split as the MX + Finicity adapters. `paycheck` is the
49
+ // Monarch/Copilot spelling of payroll income; `capital gain` catches Monarch's
50
+ // "Dividends & Capital Gains" category (growth, never a contribution).
51
+ const CSV_INFLOW = /transfer|deposit|contribution|payroll|paycheck|direct dep|buy/i;
52
+ const CSV_GROWTH = /dividend|interest|capital gain|reinvest/i;
53
+
54
+ // ── dependency-free CSV parsing ──────────────────────────────────────────────
55
+
56
+ /**
57
+ * Parse CSV text → array of rows (arrays of string cells). Handles quoted
58
+ * fields, commas/newlines inside quotes, doubled-quote escapes, CRLF and lone
59
+ * CR line endings, and a leading BOM. Tolerant by construction: an unclosed
60
+ * quote at EOF just ends the field — this function never throws.
61
+ * @param {string} text
62
+ * @returns {string[][]}
63
+ */
64
+ export function parseCsv(text) {
65
+ const s = String(text ?? '').replace(/^\uFEFF/, '');
66
+ const rows = [];
67
+ let row = [];
68
+ let field = '';
69
+ let inQuotes = false;
70
+ const endField = () => { row.push(field); field = ''; };
71
+ const endRow = () => { endField(); rows.push(row); row = []; };
72
+ for (let i = 0; i < s.length; i++) {
73
+ const c = s[i];
74
+ if (inQuotes) {
75
+ if (c === '"') {
76
+ if (s[i + 1] === '"') { field += '"'; i++; } else inQuotes = false;
77
+ } else field += c;
78
+ } else if (c === '"') inQuotes = true;
79
+ else if (c === ',') endField();
80
+ else if (c === '\n') endRow();
81
+ else if (c === '\r') { if (s[i + 1] === '\n') i++; endRow(); }
82
+ else field += c;
83
+ }
84
+ if (field !== '' || row.length) endRow();
85
+ // Drop rows that are entirely empty (blank lines, trailing newline).
86
+ return rows.filter((r) => r.some((cell) => String(cell).trim() !== ''));
87
+ }
88
+
89
+ // ── dialect table ────────────────────────────────────────────────────────────
90
+ // Each dialect: header fingerprint (`requires` keys that must all resolve via
91
+ // `columns` aliases) + the column map. First match wins, so brand dialects sit
92
+ // above the generic ones. Headers are normalized (lowercased, trimmed,
93
+ // parenthetical suffixes stripped: "Qty (Quantity)" → "qty").
94
+ //
95
+ // Optional per-dialect switches (all default off):
96
+ // labelKeys — transactions: which mapped columns compose the
97
+ // label tested against CSV_GROWTH/CSV_INFLOW
98
+ // (joined with spaces; default ['label']).
99
+ // amountSign: -1 — transactions: the source writes spending as
100
+ // POSITIVE and money-in as NEGATIVE (Copilot) —
101
+ // flip so inflows are positive like everyone else.
102
+ // transactionsOnly — the TOOL structurally cannot export balances
103
+ // (YNAB) → emit CSV_TRANSACTIONS_ONLY so callers
104
+ // know to pair the file with a balances source.
105
+ // latestBalancePerAccount — accounts: the file is a balance HISTORY (one
106
+ // row per account per date, Monarch) → keep only
107
+ // the newest row per account, never sum history.
108
+
109
+ const DIALECTS = [
110
+ {
111
+ id: 'fidelity-positions', kind: 'holdings', institution: 'Fidelity',
112
+ columns: {
113
+ accountId: ['account number'],
114
+ accountName: ['account name'],
115
+ symbol: ['symbol'],
116
+ name: ['description'],
117
+ quantity: ['quantity'],
118
+ lastPrice: ['last price'],
119
+ value: ['current value'],
120
+ costBasis: ['cost basis total', 'cost basis'],
121
+ },
122
+ requires: ['accountId', 'accountName', 'symbol', 'name', 'quantity', 'lastPrice', 'value'],
123
+ },
124
+ {
125
+ id: 'vanguard-download', kind: 'holdings', institution: 'Vanguard',
126
+ columns: {
127
+ accountId: ['account number'],
128
+ name: ['investment name'],
129
+ symbol: ['symbol'],
130
+ quantity: ['shares'],
131
+ lastPrice: ['share price'],
132
+ value: ['total value'],
133
+ costBasis: ['cost basis', 'total cost'],
134
+ },
135
+ requires: ['accountId', 'name', 'symbol', 'quantity', 'lastPrice', 'value'],
136
+ },
137
+ {
138
+ id: 'schwab-positions', kind: 'holdings', institution: 'Charles Schwab',
139
+ columns: {
140
+ symbol: ['symbol'],
141
+ name: ['description'],
142
+ quantity: ['qty', 'quantity'],
143
+ lastPrice: ['price'],
144
+ value: ['mkt val', 'market value'],
145
+ costBasis: ['cost basis', 'cost basis total'],
146
+ },
147
+ requires: ['symbol', 'name', 'quantity', 'lastPrice', 'value'],
148
+ },
149
+ {
150
+ // Monarch Money — "Download balances" from the Accounts page. The feature
151
+ // is official; the column set (Date/Account/Account Type/Institution/
152
+ // Balance) is confirmed from real exports rather than published docs. The
153
+ // file is a balance HISTORY — one row per account per date — so rows are
154
+ // collapsed to the newest date per account (summing history would fabricate
155
+ // a balance). "Account Type" values ("Brokerage", "Credit Card",
156
+ // "Real Estate", …) route through csvKind()/classify() like any Type cell.
157
+ id: 'monarch-balances', kind: 'accounts',
158
+ latestBalancePerAccount: true,
159
+ columns: {
160
+ date: ['date'],
161
+ name: ['account', 'account name'],
162
+ type: ['account type', 'type'],
163
+ institution: ['institution'],
164
+ balance: ['balance'],
165
+ },
166
+ requires: ['date', 'name', 'type', 'balance'],
167
+ },
168
+ {
169
+ // Monarch Money — "Download transactions" (official export). Sign
170
+ // convention is Mint-style: spending negative, money-in positive, so a
171
+ // transfer INTO a brokerage shows as a positive Amount. The Category
172
+ // vocabulary carries the growth signal — "Dividends & Capital Gains" and
173
+ // "Interest" match CSV_GROWTH and are excluded from contribution
174
+ // inference; "Transfer"/"Buy"/"Paycheck" match CSV_INFLOW.
175
+ id: 'monarch-transactions', kind: 'transactions',
176
+ columns: {
177
+ date: ['date'],
178
+ merchant: ['merchant'],
179
+ category: ['category'],
180
+ account: ['account'],
181
+ statement: ['original statement'],
182
+ notes: ['notes'],
183
+ amount: ['amount'],
184
+ tags: ['tags'],
185
+ },
186
+ requires: ['date', 'merchant', 'category', 'account', 'amount'],
187
+ labelKeys: ['category', 'merchant', 'statement', 'notes'],
188
+ },
189
+ {
190
+ // YNAB — register export ("Register.csv" in the official export zip):
191
+ // Account/Flag/Date/Payee/Category Group/Category/Memo/Outflow/Inflow/
192
+ // Cleared. Transactions-shaped ONLY: YNAB structurally exports no account
193
+ // balances or holdings (transactionsOnly → CSV_TRANSACTIONS_ONLY). Money
194
+ // is an Outflow/Inflow column PAIR (both non-negative) — amount =
195
+ // inflow − outflow. Contribution signal lives in Payee ("Transfer :
196
+ // Checking") and Category ("Inflow: Ready to Assign" is income, not a
197
+ // contribution); tracking-account "Reconciliation Balance Adjustment"
198
+ // entries (how YNAB users record market growth) match neither inflow nor
199
+ // growth words → correctly excluded.
200
+ id: 'ynab-register', kind: 'transactions',
201
+ transactionsOnly: true,
202
+ columns: {
203
+ account: ['account'],
204
+ flag: ['flag'],
205
+ date: ['date'],
206
+ payee: ['payee'],
207
+ category: ['category group/category', 'category'],
208
+ categoryGroup: ['category group'],
209
+ memo: ['memo'],
210
+ outflow: ['outflow'],
211
+ inflow: ['inflow'],
212
+ cleared: ['cleared'],
213
+ },
214
+ requires: ['account', 'date', 'payee', 'outflow', 'inflow'],
215
+ labelKeys: ['category', 'payee', 'memo'],
216
+ },
217
+ {
218
+ // Empower (né Personal Capital) — Holdings-page CSV export. The feature is
219
+ // official; the column set (Ticker/Name/Shares/Price/Change/1 Day %/
220
+ // 1 Day $/Value, plus Account when viewing all accounts) is
221
+ // community-documented from real exports. No Cost Basis column in the
222
+ // stock export → every holding carries the NO_COST_BASIS info warning
223
+ // (alias kept in case Empower adds one). Empower offers no official
224
+ // all-accounts BALANCES csv — a hand-assembled one falls through to the
225
+ // generic-accounts dialect below, which is the intended path.
226
+ id: 'empower-holdings', kind: 'holdings', institution: 'Empower',
227
+ columns: {
228
+ accountName: ['account', 'account name'],
229
+ symbol: ['ticker'],
230
+ name: ['name', 'security name', 'description'],
231
+ quantity: ['shares'],
232
+ lastPrice: ['price'],
233
+ value: ['value', 'market value'],
234
+ costBasis: ['cost basis'],
235
+ },
236
+ requires: ['symbol', 'name', 'quantity', 'lastPrice', 'value'],
237
+ },
238
+ {
239
+ // Copilot Money — transactions export. LOW-CONFIDENCE dialect: Copilot
240
+ // publishes no format docs; the column set (date/name/amount/status/
241
+ // category/parent category/excluded/tags/type/account/account mask/note/
242
+ // recurring) and the INVERTED sign convention (spending POSITIVE, money-in
243
+ // NEGATIVE → amountSign: -1) are community-documented. If Copilot drifts
244
+ // the headers, the file falls to generic-transactions or best-effort
245
+ // mapping — and if the sign convention is ever wrong, inflows read as
246
+ // outflows and are EXCLUDED (conservative: no contributions fabricated).
247
+ // "parent category" is the fingerprint column no other tool exports.
248
+ // The `type` vocabulary ("internal transfer"/"income"/"regular") plus
249
+ // category feed the growth/inflow split; the budgeting-only `excluded`
250
+ // flag is deliberately ignored (an excluded transfer is still real money).
251
+ // Copilot's accounts export (name/type/balance shaped, also community-
252
+ // documented) intentionally has NO dedicated dialect — generic-accounts
253
+ // already fingerprints it.
254
+ id: 'copilot-transactions', kind: 'transactions',
255
+ amountSign: -1,
256
+ columns: {
257
+ date: ['date'],
258
+ merchant: ['name'],
259
+ amount: ['amount'],
260
+ status: ['status'],
261
+ category: ['category'],
262
+ parentCategory: ['parent category'],
263
+ excluded: ['excluded'],
264
+ tags: ['tags'],
265
+ type: ['type'],
266
+ account: ['account'],
267
+ accountMask: ['account mask'],
268
+ note: ['note'],
269
+ recurring: ['recurring'],
270
+ },
271
+ requires: ['date', 'amount', 'category', 'parentCategory', 'account'],
272
+ labelKeys: ['type', 'category', 'parentCategory', 'merchant', 'note'],
273
+ },
274
+ {
275
+ id: 'generic-transactions', kind: 'transactions',
276
+ columns: {
277
+ account: ['account', 'account name', 'account number', 'account id'],
278
+ date: ['date', 'posted date', 'transaction date', 'run date', 'trade date'],
279
+ amount: ['amount', 'amount ($)', 'total amount'],
280
+ label: ['description', 'action', 'type', 'category', 'memo', 'transaction type'],
281
+ },
282
+ requires: ['date', 'amount'],
283
+ },
284
+ {
285
+ id: 'generic-accounts', kind: 'accounts',
286
+ columns: {
287
+ accountId: ['account number', 'account #', 'account id', 'number'],
288
+ name: ['account name', 'name', 'account'],
289
+ type: ['type', 'account type', 'category'],
290
+ balance: ['balance', 'current balance', 'value', 'current value', 'amount', 'total'],
291
+ rate: ['interest rate', 'apr', 'rate'],
292
+ minPayment: ['minimum payment', 'min payment', 'monthly payment'],
293
+ institution: ['institution', 'bank', 'custodian'],
294
+ owner: ['owner', 'owner index'],
295
+ },
296
+ requires: ['name', 'balance'],
297
+ },
298
+ ];
299
+
300
+ /** @implements {SourceAdapter} */
301
+ export const csvAdapter = {
302
+ source: 'csv',
303
+ /**
304
+ * @param {object} raw - { files: [{name?, kind?, content}], owner, asOf }
305
+ * @returns {CFP}
306
+ */
307
+ normalize(raw) {
308
+ // Total function: null/primitive payloads normalize to an empty profile
309
+ // (a default parameter only covers `undefined` — the contract harness
310
+ // caught the null case throwing).
311
+ raw = raw && typeof raw === 'object' ? raw : {};
312
+ const warnings = [];
313
+ const unmapped = [];
314
+ const accounts = [];
315
+ const txnRows = []; // { ref, amount, date, label } collected across files
316
+ const usedIds = new Set();
317
+ const uniqueId = (want) => {
318
+ let id = want;
319
+ for (let n = 2; usedIds.has(id); n++) id = `${want}:${n}`;
320
+ usedIds.add(id);
321
+ return id;
322
+ };
323
+
324
+ arr(raw.files).forEach((f, fileIdx) => {
325
+ const fname = str(f?.name) || `file ${fileIdx + 1}`;
326
+ const rows = parseCsv(f?.content);
327
+ if (!rows.length) {
328
+ warnings.push(warning('CSV_UNMAPPED_COLUMNS', 'warn',
329
+ `CSV file "${fname}" is empty or unparseable — nothing imported from it.`));
330
+ unmapped.push({ file: fname, reason: 'empty or unparseable' });
331
+ return;
332
+ }
333
+ const wantKind = ['accounts', 'holdings', 'transactions'].includes(f?.kind) ? f.kind : undefined;
334
+ const hit = detectDialect(rows, wantKind);
335
+
336
+ if (!hit) {
337
+ // No dialect fingerprint matched → best-effort generic mapping: the
338
+ // left-most texty column is the name, the money-densest column is the
339
+ // balance. Balances still import; the guess is warned, never hidden.
340
+ bestEffortAccounts(rows, fname, fileIdx, { accounts, warnings, unmapped, uniqueId });
341
+ return;
342
+ }
343
+
344
+ const { dialect, map, headerIdx, headers } = hit;
345
+ const data = rows.slice(headerIdx + 1);
346
+ if (dialect.kind === 'holdings') {
347
+ mapHoldingsFile(data, map, dialect, fname, fileIdx, { accounts, warnings, uniqueId });
348
+ } else if (dialect.kind === 'transactions') {
349
+ for (const r of data) {
350
+ const cell = (k) => (map[k] != null ? r[map[k]] : undefined);
351
+ let amount;
352
+ if (map.amount != null) {
353
+ amount = moneyCell(cell('amount'));
354
+ } else {
355
+ // Outflow/Inflow column pair (YNAB): both non-negative, amount is
356
+ // the net. Both cells absent/junk → no amount, row is skipped.
357
+ const inflow = moneyCell(cell('inflow'));
358
+ const outflow = moneyCell(cell('outflow'));
359
+ amount = inflow == null && outflow == null ? undefined : (inflow ?? 0) - (outflow ?? 0);
360
+ }
361
+ // Copilot writes spending positive / money-in negative — flip so a
362
+ // positive amount always means money INTO the account.
363
+ if (amount != null && dialect.amountSign === -1) amount = -amount;
364
+ // The label is what CSV_GROWTH/CSV_INFLOW are tested against —
365
+ // dialects with a category vocabulary compose it from several
366
+ // columns so the growth signal is never missed.
367
+ const label = (dialect.labelKeys ?? ['label'])
368
+ .map((k) => str(cell(k))).filter(Boolean).join(' ');
369
+ txnRows.push({ ref: str(cell('account')), amount, date: str(cell('date')), label });
370
+ }
371
+ if (dialect.transactionsOnly) {
372
+ // The TOOL (not just this file) cannot export balances — say so
373
+ // once per file, or the import looks mysteriously account-less.
374
+ warnings.push(warning('CSV_TRANSACTIONS_ONLY', 'warn',
375
+ `"${fname}" is a ${dialect.id} export — it carries transactions ONLY, never account balances or holdings. Its deposits feed contribution inference; import balances from another file (a brokerage positions/accounts CSV) or enter them manually.`));
376
+ }
377
+ warnUnmappedColumns(dialect, map, headers, fname, { warnings, unmapped });
378
+ } else {
379
+ // Balance-history files (Monarch) carry one row per account per DATE —
380
+ // keep only the newest row per account before mapping.
381
+ const acctRows = dialect.latestBalancePerAccount ? latestRowPerAccount(data, map) : data;
382
+ mapAccountsFile(acctRows, map, fname, fileIdx, { accounts, warnings, uniqueId });
383
+ warnUnmappedColumns(dialect, map, headers, fname, { warnings, unmapped });
384
+ }
385
+ });
386
+
387
+ // ── contribution inference from transactions files ──────────────────────
388
+ // Same rules as the API adapters: only deposits INTO investment accounts,
389
+ // growth (dividends/interest/reinvest) excluded, unlabeled deposits
390
+ // counted but flagged once as coarse.
391
+ const invAccounts = accounts.filter((a) => a.class === 'investment');
392
+ const byRef = new Map();
393
+ for (const a of invAccounts) {
394
+ byRef.set(low(a.id), a.id);
395
+ if (a.name) byRef.set(low(a.name), a.id);
396
+ }
397
+ let sawUnlabeledDeposit = false;
398
+ const normTxns = [];
399
+ for (const t of txnRows) {
400
+ const id = byRef.get(low(t.ref));
401
+ if (!id) { if (t.ref) unmapped.push({ transactionAccount: t.ref, reason: 'no matching investment account' }); continue; }
402
+ if (!(Number(t.amount) > 0)) continue; // outflows/junk are not contributions
403
+ if (!t.label) { sawUnlabeledDeposit = true; }
404
+ else if (CSV_GROWTH.test(t.label)) continue; // dividends/interest = growth
405
+ else if (!CSV_INFLOW.test(t.label)) continue; // labeled but neither → exclude
406
+ normTxns.push({ account_id: id, subtype: 'contribution', amount: -Math.abs(t.amount), date: t.date });
407
+ }
408
+ if (sawUnlabeledDeposit) {
409
+ warnings.push(warning('COARSE_INFERENCE', 'warn',
410
+ 'CSV contribution inference is coarse: some investment-account deposits carry no description, so ALL such unlabeled deposits were counted as contributions (may include dividends or rollovers). Verify inferred contribution rates.'));
411
+ }
412
+ const contribByAccount = contributionsByAccount(normTxns);
413
+ for (const a of accounts) {
414
+ if (contribByAccount[a.id]) a.estMonthlyContribution = contribByAccount[a.id];
415
+ }
416
+
417
+ return {
418
+ source: 'csv',
419
+ // Default snapshot time is NOW (not the 1970 epoch — see util.mjs).
420
+ asOf: raw.asOf || defaultAsOf(),
421
+ owner: { ...(raw.owner ?? {}) },
422
+ accounts,
423
+ meta: { warnings, unmapped },
424
+ };
425
+ },
426
+ };
427
+
428
+ // ── dialect detection ────────────────────────────────────────────────────────
429
+
430
+ /** Normalize a header cell: lowercase, trim, strip "(…)" suffixes + BOM. */
431
+ const normHeader = (h) => String(h ?? '')
432
+ .replace(/^\uFEFF/, '')
433
+ .trim()
434
+ .toLowerCase()
435
+ .replace(/\s*\([^)]*\)\s*$/, '')
436
+ .replace(/\s+/g, ' ');
437
+
438
+ /** Resolve a dialect's column aliases against normalized headers → {key: index}. */
439
+ function resolveColumns(headers, columns) {
440
+ const map = {};
441
+ for (const [key, aliases] of Object.entries(columns)) {
442
+ for (const alias of aliases) {
443
+ const idx = headers.indexOf(alias);
444
+ if (idx >= 0) { map[key] = idx; break; }
445
+ }
446
+ }
447
+ return map;
448
+ }
449
+
450
+ /**
451
+ * Find the header row + dialect. Broker exports carry preamble lines
452
+ * (timestamps, blank rows) before the real header, so the first several rows
453
+ * are each tried as a candidate header; the first row matching any dialect's
454
+ * fingerprint wins. `wantKind` (explicit file.kind) restricts the candidates.
455
+ */
456
+ function detectDialect(rows, wantKind) {
457
+ const candidates = wantKind ? DIALECTS.filter((d) => d.kind === wantKind) : DIALECTS;
458
+ const scanTo = Math.min(rows.length, 10);
459
+ for (let i = 0; i < scanTo; i++) {
460
+ const headers = rows[i].map(normHeader);
461
+ for (const dialect of candidates) {
462
+ const map = resolveColumns(headers, dialect.columns);
463
+ if (dialect.requires.every((k) => map[k] != null)) return { dialect, map, headerIdx: i, headers };
464
+ }
465
+ }
466
+ return null;
467
+ }
468
+
469
+ /** Warn (stable code CSV_UNMAPPED_COLUMNS) when generic mapping left columns behind. */
470
+ function warnUnmappedColumns(dialect, map, headers, fname, ctx) {
471
+ const used = new Set(Object.values(map));
472
+ const missed = headers.filter((h, i) => h && !used.has(i));
473
+ if (!missed.length) return;
474
+ ctx.warnings.push(warning('CSV_UNMAPPED_COLUMNS', 'warn',
475
+ `CSV file "${fname}": column(s) ${missed.map((c) => `"${c}"`).join(', ')} did not match the ${dialect.id} mapping and were ignored — rename them to a recognized header if they carry balances.`));
476
+ ctx.unmapped.push({ file: fname, unmappedColumns: missed });
477
+ }
478
+
479
+ /**
480
+ * Collapse a balance-HISTORY file (one row per account per date, e.g. a
481
+ * Monarch balances download) to the newest row per account. A later row wins
482
+ * a date tie; rows whose dates don't parse lose to any parseable date but
483
+ * still fall back to last-row-wins among themselves — deterministic either way.
484
+ */
485
+ function latestRowPerAccount(data, map) {
486
+ const byName = new Map(); // low(account name) → { row, when }
487
+ for (const r of data) {
488
+ const name = str(map.name != null ? r[map.name] : undefined);
489
+ if (!name) continue; // blank/footer rows can't identify an account
490
+ const when = Date.parse(str(map.date != null ? r[map.date] : undefined));
491
+ const prev = byName.get(low(name));
492
+ const newer = !prev
493
+ || !Number.isFinite(prev.when)
494
+ || (Number.isFinite(when) && when >= prev.when);
495
+ if (newer) byName.set(low(name), { row: r, when });
496
+ }
497
+ return [...byName.values()].map((x) => x.row);
498
+ }
499
+
500
+ // ── file mappers ─────────────────────────────────────────────────────────────
501
+
502
+ /** Positions export → one investment account per account-number/name group. */
503
+ function mapHoldingsFile(data, map, dialect, fname, fileIdx, ctx) {
504
+ const groups = new Map(); // key → { id, name, holdings }
505
+ for (const r of data) {
506
+ const cell = (k) => (map[k] != null ? r[map[k]] : undefined);
507
+ const symbol = str(cell('symbol'));
508
+ const name = str(cell('name'));
509
+ if (!symbol && !name) continue; // blank/short disclaimer rows
510
+ // Broker exports append synthetic rows — never model them as holdings.
511
+ if (/pending activity|account total|^total$|grand total|^cash & cash investments$/i.test(symbol || name)) continue;
512
+ const value = moneyCell(cell('value'));
513
+ const quantity = moneyCell(cell('quantity'));
514
+ if (value == null && quantity == null) continue; // footer/disclaimer text rows
515
+ const acctId = str(cell('accountId'));
516
+ const acctName = str(cell('accountName'))
517
+ || (acctId ? `${dialect.institution} ${acctId}` : fname.replace(/\.csv$/i, ''));
518
+ const key = acctId || acctName;
519
+ if (!groups.has(key)) groups.set(key, { id: acctId || `csv:${fileIdx}:${slug(acctName)}`, name: acctName, holdings: [] });
520
+ const g = groups.get(key);
521
+ const costBasis = moneyCell(cell('costBasis'));
522
+ if (costBasis == null) {
523
+ ctx.warnings.push(warning('NO_COST_BASIS', 'info',
524
+ `Holding ${symbol || name} has no cost basis in "${fname}" (cell empty or "--").`, g.id));
525
+ }
526
+ g.holdings.push({
527
+ ticker: symbol ? symbol.replace(/\*+$/, '') : undefined, // Fidelity core positions end in **
528
+ name: name || undefined,
529
+ quantity: quantity ?? undefined,
530
+ value: value ?? undefined,
531
+ costBasis: costBasis ?? undefined,
532
+ assetType: csvAssetType(symbol, name),
533
+ });
534
+ }
535
+ for (const g of groups.values()) {
536
+ const id = ctx.uniqueId(g.id);
537
+ // Positions exports carry NO type column — the account name is the only
538
+ // typing signal, so the classification is ALWAYS surfaced as a guess.
539
+ const hint = csvKind(g.name);
540
+ const { accountClass, taxTreatment } = hint
541
+ ? classify(hint[0], hint[1])
542
+ : { accountClass: 'investment', taxTreatment: 'taxable' };
543
+ ctx.warnings.push(warning('CLASSIFICATION_GUESSED', 'warn',
544
+ `CSV positions file "${fname}" has no account-type column — "${g.name}" typed from its name → ${accountClass}/${taxTreatment}. Reclassify if wrong.`, id));
545
+ ctx.accounts.push({
546
+ id,
547
+ institution: dialect.institution,
548
+ name: g.name,
549
+ class: 'investment', // positions exports are investment accounts by construction
550
+ subtype: hint ? String(hint[1] ?? '').toLowerCase() : '',
551
+ taxTreatment: accountClass === 'investment' ? taxTreatment : 'taxable',
552
+ balance: g.holdings.reduce((n, h) => n + (Number.isFinite(h.value) ? h.value : 0), 0),
553
+ currency: 'USD',
554
+ ownerIndex: 0,
555
+ holdings: g.holdings,
556
+ });
557
+ }
558
+ }
559
+
560
+ /** Generic accounts file → one canonical account per row. */
561
+ function mapAccountsFile(data, map, fname, fileIdx, ctx) {
562
+ data.forEach((r, rowIdx) => {
563
+ const cell = (k) => (map[k] != null ? r[map[k]] : undefined);
564
+ const name = str(cell('name'));
565
+ const balanceRaw = moneyCell(cell('balance'));
566
+ if (!name && balanceRaw == null) return; // fully blank / footer row
567
+ const id = ctx.uniqueId(str(cell('accountId')) || `csv:${fileIdx}:${rowIdx}`);
568
+
569
+ const typeStr = str(cell('type'));
570
+ const fromType = csvKind(typeStr);
571
+ const hint = fromType ?? csvKind(name);
572
+ let cls;
573
+ if (hint) {
574
+ cls = classifyHint(hint);
575
+ if (!fromType) {
576
+ ctx.warnings.push(warning('CLASSIFICATION_GUESSED', 'warn',
577
+ `CSV account "${name || id}" in "${fname}" has no Type value — typed from its name → ${cls.accountClass}/${cls.taxTreatment}. Add a Type column to remove the guess.`, id));
578
+ } else if (cls.confidence === 'low') {
579
+ ctx.warnings.push(warning('CLASSIFICATION_GUESSED', 'warn',
580
+ `CSV account "${name || id}" (type "${typeStr}") classification guessed → ${cls.accountClass}/${cls.taxTreatment}.`, id));
581
+ }
582
+ } else {
583
+ // No recognizable type signal at all: import the balance honestly as a
584
+ // low-confidence taxable investment — never fabricate a richer story.
585
+ cls = { accountClass: 'investment', taxTreatment: 'taxable', confidence: 'low' };
586
+ ctx.warnings.push(warning('CLASSIFICATION_GUESSED', 'warn',
587
+ `CSV account "${name || id}" in "${fname}" has no recognizable type — imported as a taxable investment at low confidence. Set a Type column (e.g. "401k", "Checking", "Mortgage") to classify it.`, id));
588
+ }
589
+
590
+ const isDebt = cls.accountClass === 'loan' || cls.accountClass === 'credit';
591
+ // Accounting-style negatives ("(1,850.00)") are how spreadsheets mark owed
592
+ // balances; a debt's outstanding principal is |x| either way.
593
+ const balance = isDebt ? Math.abs(balanceRaw ?? 0) : (balanceRaw ?? 0);
594
+
595
+ const acct = {
596
+ id,
597
+ institution: str(cell('institution')) || undefined,
598
+ name: name || undefined,
599
+ class: cls.accountClass,
600
+ subtype: hint ? String(hint[1] ?? '').toLowerCase() : '',
601
+ taxTreatment: cls.taxTreatment,
602
+ balance,
603
+ currency: 'USD',
604
+ ownerIndex: intOr0(cell('owner')),
605
+ };
606
+ if (isDebt) {
607
+ const rate = rateCell(cell('rate'));
608
+ const minPayment = moneyCell(cell('minPayment'));
609
+ acct.liability = {
610
+ rate,
611
+ minPayment: minPayment != null ? Math.abs(minPayment) : undefined,
612
+ ...(acct.subtype === 'mortgage' || acct.subtype === 'home equity' ? { assetName: name || 'Home' } : {}),
613
+ };
614
+ }
615
+ ctx.accounts.push(acct);
616
+ });
617
+ }
618
+
619
+ /**
620
+ * No dialect matched at all → best-effort generic mapping. Row 0 is the
621
+ * header; the left-most non-numeric column is the name, the column whose data
622
+ * cells most often parse as money is the balance. Everything unmapped is
623
+ * named in a CSV_UNMAPPED_COLUMNS warning. No money-ish column → nothing to
624
+ * import; say so instead of inventing zeros.
625
+ */
626
+ function bestEffortAccounts(rows, fname, fileIdx, ctx) {
627
+ const headers = rows[0].map(normHeader);
628
+ const data = rows.slice(1);
629
+ const width = Math.max(headers.length, ...data.map((r) => r.length), 0);
630
+ let balanceIdx = -1;
631
+ let bestScore = 0;
632
+ for (let c = width - 1; c >= 0; c--) {
633
+ const score = data.reduce((n, r) => n + (moneyCell(r[c]) != null ? 1 : 0), 0);
634
+ if (score > bestScore || (score === bestScore && score > 0 && balanceIdx < 0)) { bestScore = score; balanceIdx = c; }
635
+ }
636
+ let nameIdx = -1;
637
+ for (let c = 0; c < width; c++) {
638
+ if (c === balanceIdx) continue;
639
+ const texty = data.reduce((n, r) => n + (str(r[c]) && moneyCell(r[c]) == null ? 1 : 0), 0);
640
+ if (texty > 0) { nameIdx = c; break; }
641
+ }
642
+ if (balanceIdx < 0 || !data.length) {
643
+ ctx.warnings.push(warning('CSV_UNMAPPED_COLUMNS', 'warn',
644
+ `CSV file "${fname}" matched no known dialect and no column parses as money — nothing imported. Columns seen: ${headers.filter(Boolean).map((c) => `"${c}"`).join(', ') || '(none)'}.`));
645
+ ctx.unmapped.push({ file: fname, unmappedColumns: headers, reason: 'no dialect, no money column' });
646
+ return;
647
+ }
648
+ const missed = headers.filter((h, i) => h && i !== balanceIdx && i !== nameIdx);
649
+ ctx.warnings.push(warning('CSV_UNMAPPED_COLUMNS', 'warn',
650
+ `CSV file "${fname}" matched no known dialect — best-effort import used "${headers[nameIdx] ?? `column ${nameIdx + 1}`}" as the name and "${headers[balanceIdx] ?? `column ${balanceIdx + 1}`}" as the balance.${missed.length ? ` Unmapped column(s): ${missed.map((c) => `"${c}"`).join(', ')}.` : ''}`));
651
+ if (missed.length) ctx.unmapped.push({ file: fname, unmappedColumns: missed });
652
+ data.forEach((r, rowIdx) => {
653
+ const balance = moneyCell(r[balanceIdx]);
654
+ if (balance == null) return;
655
+ const name = nameIdx >= 0 ? str(r[nameIdx]) : '';
656
+ const id = ctx.uniqueId(`csv:${fileIdx}:${rowIdx}`);
657
+ const hint = csvKind(name);
658
+ const cls = hint ? classifyHint(hint) : { accountClass: 'investment', taxTreatment: 'taxable' };
659
+ ctx.warnings.push(warning('CLASSIFICATION_GUESSED', 'warn',
660
+ `CSV account "${name || id}" in "${fname}" ${hint ? 'typed from its name' : 'has no recognizable type — imported as a taxable investment'} → ${cls.accountClass}/${cls.taxTreatment} (best-effort file mapping).`, id));
661
+ const isDebt = cls.accountClass === 'loan' || cls.accountClass === 'credit';
662
+ ctx.accounts.push({
663
+ id,
664
+ name: name || undefined,
665
+ class: cls.accountClass,
666
+ subtype: hint ? String(hint[1] ?? '').toLowerCase() : '',
667
+ taxTreatment: cls.taxTreatment,
668
+ balance: isDebt ? Math.abs(balance) : balance,
669
+ currency: 'USD',
670
+ ownerIndex: 0,
671
+ ...(isDebt ? { liability: { rate: undefined } } : {}),
672
+ });
673
+ });
674
+ }
675
+
676
+ // ── cell helpers ─────────────────────────────────────────────────────────────
677
+ const low = (x) => String(x ?? '').trim().toLowerCase();
678
+ const str = (x) => String(x ?? '').trim();
679
+ const slug = (x) => low(x).replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '') || 'acct';
680
+ const intOr0 = (x) => (Number.isInteger(Number(str(x))) && str(x) !== '' ? Number(str(x)) : 0);
681
+
682
+ /**
683
+ * Parse a money-ish cell → number, or undefined when it isn't one. Handles
684
+ * "$1,234.56", " 1234 ", "(1,850.00)" (accounting negative), "-500", "5.5%",
685
+ * and treats "--" / "n/a" / "" as absent. Never returns NaN/Infinity.
686
+ */
687
+ export function moneyCell(v) {
688
+ if (v == null) return undefined;
689
+ let s = String(v).trim();
690
+ if (!s || s === '--' || /^n\/?a$/i.test(s)) return undefined;
691
+ let neg = false;
692
+ const paren = s.match(/^\((.*)\)$/);
693
+ if (paren) { neg = true; s = paren[1]; }
694
+ s = s.replace(/[$\s,]/g, '').replace(/%$/, '');
695
+ if (s.startsWith('-')) { neg = true; s = s.slice(1); }
696
+ if (s.startsWith('+')) s = s.slice(1);
697
+ if (!/^\d*\.?\d+$/.test(s)) return undefined;
698
+ const n = Number(s);
699
+ if (!Number.isFinite(n)) return undefined;
700
+ return neg ? -n : n;
701
+ }
702
+
703
+ /** Rate cells are percentages ("5.25%" or "5.25") → fraction, like pct(). */
704
+ function rateCell(v) {
705
+ const n = moneyCell(v);
706
+ return n == null ? undefined : n / 100;
707
+ }
708
+
709
+ /**
710
+ * Map a free-text type/name → generic [type, subtype] that classify()
711
+ * consumes (the CSV analogue of FIN_TYPE/MX_TYPE, regex-based because CSV
712
+ * cells are human-typed). Returns undefined when nothing is recognizable.
713
+ */
714
+ function csvKind(s) {
715
+ const t = low(s).replace(/[()]/g, ''); // "401(k)" → "401k"
716
+ if (!t) return undefined;
717
+ if (/home equity|heloc/.test(t)) return ['loan', 'home equity'];
718
+ if (/mortgage/.test(t)) return ['loan', 'mortgage'];
719
+ if (/student/.test(t)) return ['loan', 'student'];
720
+ if (/(auto|car|vehicle).*(loan|note)|loan.*(auto|car)/.test(t)) return ['loan', 'auto'];
721
+ if (/credit card|creditcard|visa|mastercard|amex|discover card/.test(t)) return ['credit', 'credit card'];
722
+ if (/line of credit/.test(t)) return ['credit', 'line of credit'];
723
+ if (/\bloan\b/.test(t)) return ['loan', undefined];
724
+ // Monarch account types include physical assets ("Real Estate", "Vehicle",
725
+ // "Valuables") — model them as property, not as a fake investment balance.
726
+ if (/real estate|\bproperty\b|primary home|family home/.test(t)) return ['property', 'real estate'];
727
+ if (/vehicle|valuables/.test(t)) return ['property', t];
728
+ if (/checking|savings|money market|certificate|\bcd\b|cash management/.test(t)) return ['depository', t];
729
+ if (/hsa|health savings/.test(t)) return ['investment', 'hsa'];
730
+ if (/529|coverdell|education/.test(t)) return ['investment', '529'];
731
+ if (/annuity/.test(t)) return ['investment', 'tax-deferred']; // pre-tax wrapper, flavor unknown → low confidence in classify()
732
+ if (/roth/.test(t)) return ['investment', t]; // roth ira / roth 401k — classify() reads the word
733
+ if (/401|403b|457b|\bira\b|sep|simple|keogh|tsp|pension|retirement|rollover/.test(t)) return ['investment', t];
734
+ if (/brokerage|invest|taxable|mutual fund|stock|etf|crypto/.test(t)) return ['investment', t];
735
+ return undefined;
736
+ }
737
+
738
+ /**
739
+ * classify() speaks depository/investment/loan/credit; `property` is routed
740
+ * around it (same pattern as the MX adapter) — a house/vehicle balance is a
741
+ * market value with no tax wrapper, and letting classify() fall through would
742
+ * mislabel it a taxable investment.
743
+ */
744
+ function classifyHint(hint) {
745
+ return hint[0] === 'property'
746
+ ? { accountClass: 'property', taxTreatment: 'na', confidence: 'high' }
747
+ : classify(hint[0], hint[1]);
748
+ }
749
+
750
+ /**
751
+ * CSV positions carry no security-type field — infer the canonical assetType
752
+ * from ticker/description keywords. Defaults to 'equity' (a positions row is
753
+ * a security by construction).
754
+ */
755
+ function csvAssetType(symbol, name) {
756
+ const s = `${low(symbol)} ${low(name)}`;
757
+ if (/bitcoin|ethereum|crypto|\bbtc\b|\beth\b/.test(s)) return 'crypto';
758
+ if (/money market|spaxx|fdrxx|swvxx|vmfxx|\bcash\b/.test(s)) return 'cash';
759
+ if (/\betf\b|ishares|spdr/.test(s)) return 'etf';
760
+ if (/bond|treasury|fixed income/.test(s)) return 'bond';
761
+ if (/fund|index|admiral|instl/.test(s)) return 'mutual_fund';
762
+ return 'equity';
763
+ }