verifyhash 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +883 -0
  3. package/cli/abi/ContributionRegistry.json +881 -0
  4. package/cli/agent.js +2173 -0
  5. package/cli/anchor-artifact.js +853 -0
  6. package/cli/anchor.js +400 -0
  7. package/cli/claim.js +881 -0
  8. package/cli/core/agent-commit.js +448 -0
  9. package/cli/core/agent-session.js +598 -0
  10. package/cli/core/anchor-binding.js +663 -0
  11. package/cli/core/attestation.js +580 -0
  12. package/cli/core/evidence-plans.js +495 -0
  13. package/cli/core/fixtures/evidence-plans/baseline.json +19 -0
  14. package/cli/core/fulfill-intake.js +1082 -0
  15. package/cli/core/go-live-preflight.js +481 -0
  16. package/cli/core/license.js +534 -0
  17. package/cli/core/manifest.js +243 -0
  18. package/cli/core/packetseal.js +591 -0
  19. package/cli/core/registryArtifact.js +49 -0
  20. package/cli/core/revocation.js +539 -0
  21. package/cli/core/rfc3161.js +389 -0
  22. package/cli/core/timestamp.js +482 -0
  23. package/cli/core/trust-asof.js +479 -0
  24. package/cli/dataset.js +2950 -0
  25. package/cli/evidence.js +2227 -0
  26. package/cli/fulfill-webhook-http.js +438 -0
  27. package/cli/git.js +220 -0
  28. package/cli/hash.js +550 -0
  29. package/cli/identity.js +1072 -0
  30. package/cli/journal-cli.js +1110 -0
  31. package/cli/journal-log.js +454 -0
  32. package/cli/journal.js +334 -0
  33. package/cli/lineage.js +447 -0
  34. package/cli/list.js +287 -0
  35. package/cli/parcel.js +1509 -0
  36. package/cli/proof.js +578 -0
  37. package/cli/prove.js +300 -0
  38. package/cli/receipt.js +631 -0
  39. package/cli/registry.js +331 -0
  40. package/cli/reputation.js +344 -0
  41. package/cli/revocation.js +495 -0
  42. package/cli/serve-verify-http.js +298 -0
  43. package/cli/serve-verify.js +333 -0
  44. package/cli/show.js +339 -0
  45. package/cli/verify.js +383 -0
  46. package/cli/vh.js +3927 -0
  47. package/docs/ADOPT.md +183 -0
  48. package/docs/ADOPTION.json +11 -0
  49. package/docs/AGENTTRACE.md +247 -0
  50. package/docs/ANCHORING.md +167 -0
  51. package/docs/AUDIT.md +55 -0
  52. package/docs/CONFORMANCE.md +107 -0
  53. package/docs/DATALEDGER.md +638 -0
  54. package/docs/DECIDE.md +47 -0
  55. package/docs/DECISIONS-PENDING.md +27 -0
  56. package/docs/DEPLOY-PUBLIC-SITE.md +301 -0
  57. package/docs/ENGINE-LEDGER.json +12 -0
  58. package/docs/EVIDENCE.md +519 -0
  59. package/docs/GO-LIVE.md +66 -0
  60. package/docs/IDENTITY.md +123 -0
  61. package/docs/INDEPENDENT-VERIFICATION.md +377 -0
  62. package/docs/INTEGRITY-JOURNAL.md +337 -0
  63. package/docs/KEY-LIFECYCLE.md +179 -0
  64. package/docs/LICENSING.md +46 -0
  65. package/docs/LINEAGE.md +307 -0
  66. package/docs/LOOP-AUDIT-2026-07-03.json +580 -0
  67. package/docs/LOOP-HARDENING-PLAN.md +44 -0
  68. package/docs/MERKLE-LEAVES.md +113 -0
  69. package/docs/METRICS.jsonl +31 -0
  70. package/docs/MORNING.md +204 -0
  71. package/docs/PILOT.md +444 -0
  72. package/docs/PROOFPARCEL.md +227 -0
  73. package/docs/PROOFS.md +262 -0
  74. package/docs/RECEIPTS.md +341 -0
  75. package/docs/REPUTATION.md +158 -0
  76. package/docs/SDK.md +301 -0
  77. package/docs/STRATEGY-ARCHIVE.md +5055 -0
  78. package/docs/SUPERVISOR-RUNBOOK.md +52 -0
  79. package/docs/TRUST-BOUNDARIES.md +335 -0
  80. package/docs/TRUSTLEDGER.md +1976 -0
  81. package/docs/USAGE-BUDGET.json +121 -0
  82. package/docs/VERIFY-SERVICE.md +168 -0
  83. package/index.js +160 -0
  84. package/package.json +41 -0
  85. package/trustledger/build-standalone.js +796 -0
  86. package/trustledger/cli.js +3179 -0
  87. package/trustledger/close.js +391 -0
  88. package/trustledger/corpus.js +159 -0
  89. package/trustledger/dist/BUILD-PROVENANCE.json +99 -0
  90. package/trustledger/dist/trustledger-standalone.html +6197 -0
  91. package/trustledger/dist/trustledger-standalone.html.sha256 +1 -0
  92. package/trustledger/door-core.js +442 -0
  93. package/trustledger/fixtures/bank.csv +7 -0
  94. package/trustledger/fixtures/bank.malformed.csv +3 -0
  95. package/trustledger/fixtures/bank.noalias.csv +5 -0
  96. package/trustledger/fixtures/bank.ofx +34 -0
  97. package/trustledger/fixtures/bank.real.csv +5 -0
  98. package/trustledger/fixtures/corpus/_shared/prior-close.json +22 -0
  99. package/trustledger/fixtures/corpus/bank-book-mismatch--benign-twin/inputs.json +14 -0
  100. package/trustledger/fixtures/corpus/bank-book-mismatch--benign-twin/meta.json +7 -0
  101. package/trustledger/fixtures/corpus/bank-book-mismatch--out-of-trust/inputs.json +14 -0
  102. package/trustledger/fixtures/corpus/bank-book-mismatch--out-of-trust/meta.json +7 -0
  103. package/trustledger/fixtures/corpus/continuity-break--benign-twin/inputs.json +15 -0
  104. package/trustledger/fixtures/corpus/continuity-break--benign-twin/meta.json +7 -0
  105. package/trustledger/fixtures/corpus/continuity-break--out-of-trust/inputs.json +15 -0
  106. package/trustledger/fixtures/corpus/continuity-break--out-of-trust/meta.json +7 -0
  107. package/trustledger/fixtures/corpus/negative-tenant-ledger--benign-twin/inputs.json +13 -0
  108. package/trustledger/fixtures/corpus/negative-tenant-ledger--benign-twin/meta.json +7 -0
  109. package/trustledger/fixtures/corpus/negative-tenant-ledger--out-of-trust/inputs.json +13 -0
  110. package/trustledger/fixtures/corpus/negative-tenant-ledger--out-of-trust/meta.json +7 -0
  111. package/trustledger/fixtures/corpus/owner-overdraw--benign-twin/inputs.json +15 -0
  112. package/trustledger/fixtures/corpus/owner-overdraw--benign-twin/meta.json +7 -0
  113. package/trustledger/fixtures/corpus/owner-overdraw--out-of-trust/inputs.json +15 -0
  114. package/trustledger/fixtures/corpus/owner-overdraw--out-of-trust/meta.json +7 -0
  115. package/trustledger/fixtures/corpus/security-deposit-segregation--benign-twin/inputs.json +16 -0
  116. package/trustledger/fixtures/corpus/security-deposit-segregation--benign-twin/meta.json +7 -0
  117. package/trustledger/fixtures/corpus/security-deposit-segregation--out-of-trust/inputs.json +13 -0
  118. package/trustledger/fixtures/corpus/security-deposit-segregation--out-of-trust/meta.json +7 -0
  119. package/trustledger/fixtures/corpus/subledger-out-of-balance--benign-twin/inputs.json +13 -0
  120. package/trustledger/fixtures/corpus/subledger-out-of-balance--benign-twin/meta.json +7 -0
  121. package/trustledger/fixtures/corpus/subledger-out-of-balance--out-of-trust/inputs.json +13 -0
  122. package/trustledger/fixtures/corpus/subledger-out-of-balance--out-of-trust/meta.json +7 -0
  123. package/trustledger/fixtures/e2e/bank.aliased.csv +4 -0
  124. package/trustledger/fixtures/e2e/bank.csv +4 -0
  125. package/trustledger/fixtures/e2e/bank.nsf.csv +4 -0
  126. package/trustledger/fixtures/e2e/quickbooks.csv +6 -0
  127. package/trustledger/fixtures/e2e/quickbooks.nsf.csv +8 -0
  128. package/trustledger/fixtures/e2e/rentroll.csv +6 -0
  129. package/trustledger/fixtures/e2e/rentroll.nsf.csv +8 -0
  130. package/trustledger/fixtures/e2e/rentroll.short.csv +5 -0
  131. package/trustledger/fixtures/plans/baseline.json +25 -0
  132. package/trustledger/fixtures/plans/price-binding.example.json +27 -0
  133. package/trustledger/fixtures/policy/ambiguous-deposit-example.json +12 -0
  134. package/trustledger/fixtures/policy/baseline.json +19 -0
  135. package/trustledger/fixtures/policy/ca-example.json +12 -0
  136. package/trustledger/fixtures/policy/negative-tenant-ledger-example.json +12 -0
  137. package/trustledger/fixtures/policy/owner-overdraw-example.json +12 -0
  138. package/trustledger/fixtures/quickbooks.csv +7 -0
  139. package/trustledger/fixtures/quickbooks.real.csv +5 -0
  140. package/trustledger/fixtures/rentroll.csv +6 -0
  141. package/trustledger/fixtures/rentroll.real.csv +4 -0
  142. package/trustledger/ingest.js +1163 -0
  143. package/trustledger/lib/policy-bundled-loader.js +44 -0
  144. package/trustledger/lib/sha256-vendored.js +227 -0
  145. package/trustledger/license.js +563 -0
  146. package/trustledger/match.js +551 -0
  147. package/trustledger/plans.js +551 -0
  148. package/trustledger/policy.js +398 -0
  149. package/trustledger/public/index.html +512 -0
  150. package/trustledger/reconcile.js +1486 -0
  151. package/trustledger/report.js +887 -0
  152. package/trustledger/seal.js +854 -0
  153. package/trustledger/server.js +391 -0
  154. package/trustledger/valueproof.js +350 -0
@@ -0,0 +1,1163 @@
1
+ "use strict";
2
+
3
+ // TrustLedger — ingest.js
4
+ //
5
+ // T-22.1: parse + NORMALIZE the three monthly inputs a small property-management
6
+ // trust-account reconciliation needs into ONE transaction model:
7
+ //
8
+ // (a) a BANK STATEMENT — CSV or OFX/QFX
9
+ // (b) a QUICKBOOKS trust ledger — CSV export
10
+ // (c) a RENT-ROLL / tenant — CSV sub-ledger
11
+ //
12
+ // Every parser is a PURE function: (text, [opts]) -> NormalizedRecord[].
13
+ // No I/O, no clock, no globals — the same input always yields the same output,
14
+ // which is what makes the downstream matcher/reconciler deterministic and
15
+ // audit-defensible.
16
+ //
17
+ // Normalized record shape (every field always present):
18
+ // {
19
+ // date: "YYYY-MM-DD", // ISO calendar date
20
+ // amount: <integer cents>, // SIGNED: + = money INTO the trust account,
21
+ // // - = money OUT. Never a float.
22
+ // memo: <string>, // free-text description (trimmed)
23
+ // kind: <Kind>, // coarse transaction class (see KIND)
24
+ // party: <string>, // tenant / payee / counterparty ("" if unknown)
25
+ // source: <Source>, // which input this row came from
26
+ // }
27
+ //
28
+ // Amounts are INTEGER CENTS throughout. Dollar strings are parsed by exact
29
+ // digit manipulation (never `parseFloat`), so "1234.99" -> 123499 with zero
30
+ // binary-float drift, and a value like "10.005" is REJECTED, not rounded.
31
+ //
32
+ // "Strict" is the whole point: a malformed row raises an IngestError naming the
33
+ // row number and the problem, rather than being silently dropped. A trust
34
+ // reconciliation that quietly skips a row is worse than useless — it hides the
35
+ // exact discrepancy a broker is legally on the hook to find.
36
+
37
+ // ---------------------------------------------------------------------------
38
+ // Enums
39
+ // ---------------------------------------------------------------------------
40
+
41
+ const SOURCE = Object.freeze({
42
+ BANK: "bank",
43
+ QUICKBOOKS: "quickbooks",
44
+ RENT_ROLL: "rent_roll",
45
+ });
46
+
47
+ const KIND = Object.freeze({
48
+ DEPOSIT: "deposit", // money in (rent received, owner contribution)
49
+ CHECK: "check", // money out by check (owner draw, vendor, refund)
50
+ TRANSFER: "transfer", // money moved between accounts
51
+ FEE: "fee", // bank/service fee out
52
+ NSF: "nsf", // returned/bounced item reversal
53
+ ADJUSTMENT: "adjustment", // manual correction
54
+ OTHER: "other", // classified but uncategorized
55
+ });
56
+
57
+ const VALID_KINDS = new Set(Object.values(KIND));
58
+
59
+ // ---------------------------------------------------------------------------
60
+ // Errors
61
+ // ---------------------------------------------------------------------------
62
+
63
+ class IngestError extends Error {
64
+ // `row` is a 1-based line number within the data (1 = first data row, header
65
+ // excluded) when known, else null. `source` is the SOURCE being parsed.
66
+ constructor(message, { row = null, source = null } = {}) {
67
+ const where =
68
+ row != null ? ` (row ${row}${source ? `, ${source}` : ""})` : "";
69
+ super(`${message}${where}`);
70
+ this.name = "IngestError";
71
+ this.row = row;
72
+ this.source = source;
73
+ }
74
+ }
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // Amount parsing — exact integer cents, no float
78
+ // ---------------------------------------------------------------------------
79
+
80
+ // Parse a human dollar string into SIGNED integer cents, exactly.
81
+ //
82
+ // Accepts:
83
+ // "1,234.56" "1234.56" "1234" ".5" "0.05" "$1,234.56"
84
+ // leading "-" or "+", and accounting-style parentheses "(1,234.56)" => negative.
85
+ // Rejects (throws):
86
+ // empty, non-numeric, > 2 decimal places, multiple signs, malformed grouping.
87
+ //
88
+ // `field` and `loc` ({row, source}) only flavor the error message.
89
+ function parseCents(raw, field = "amount", loc = {}) {
90
+ if (raw == null) {
91
+ throw new IngestError(`missing ${field}`, loc);
92
+ }
93
+ let s = String(raw).trim();
94
+ if (s === "") {
95
+ throw new IngestError(`empty ${field}`, loc);
96
+ }
97
+
98
+ // Accounting negatives: (1,234.56) == -1234.56
99
+ let negative = false;
100
+ if (/^\(.*\)$/.test(s)) {
101
+ negative = true;
102
+ s = s.slice(1, -1).trim();
103
+ }
104
+
105
+ // Strip a single currency symbol if present.
106
+ s = s.replace(/^\$/, "").trim();
107
+
108
+ // Leading sign.
109
+ const signMatch = s.match(/^([+-])/);
110
+ if (signMatch) {
111
+ if (signMatch[1] === "-") negative = !negative;
112
+ s = s.slice(1).trim();
113
+ }
114
+ if (s === "") {
115
+ throw new IngestError(`malformed ${field}: "${raw}"`, loc);
116
+ }
117
+
118
+ // No further signs allowed anywhere.
119
+ if (/[+-]/.test(s)) {
120
+ throw new IngestError(`malformed ${field}: "${raw}"`, loc);
121
+ }
122
+
123
+ // Remove thousands separators ONLY when they group digits correctly.
124
+ // (We do not try to be clever about locale; commas are grouping, period is
125
+ // the decimal point — the US convention these inputs use.)
126
+ if (s.includes(",")) {
127
+ const parts = s.split(",");
128
+ // First group: 1..3 digits; every later group: exactly 3 digits.
129
+ // The last group may carry the decimal portion.
130
+ for (let i = 0; i < parts.length; i++) {
131
+ const seg = i === parts.length - 1 ? parts[i].split(".")[0] : parts[i];
132
+ const ok = i === 0 ? /^\d{1,3}$/.test(seg) : /^\d{3}$/.test(seg);
133
+ if (!ok) {
134
+ throw new IngestError(`malformed ${field}: "${raw}"`, loc);
135
+ }
136
+ }
137
+ s = s.replace(/,/g, "");
138
+ }
139
+
140
+ // Now s must be digits with at most one dot and <=2 fractional digits.
141
+ const m = s.match(/^(\d*)(?:\.(\d{0,2}))?$/);
142
+ if (!m || (m[1] === "" && (m[2] === undefined || m[2] === ""))) {
143
+ throw new IngestError(`malformed ${field}: "${raw}"`, loc);
144
+ }
145
+ const whole = m[1] === "" ? "0" : m[1];
146
+ const frac = (m[2] || "").padEnd(2, "0");
147
+
148
+ // Build cents via integer math — no Number on the dollar portion's magnitude
149
+ // beyond safe-integer range checks.
150
+ const dollars = Number(whole);
151
+ const cents = Number(frac);
152
+ if (!Number.isSafeInteger(dollars)) {
153
+ throw new IngestError(`amount out of range: "${raw}"`, loc);
154
+ }
155
+ let total = dollars * 100 + cents;
156
+ if (!Number.isSafeInteger(total)) {
157
+ throw new IngestError(`amount out of range: "${raw}"`, loc);
158
+ }
159
+ if (negative) total = -total;
160
+ return total;
161
+ }
162
+
163
+ // ---------------------------------------------------------------------------
164
+ // Date parsing — normalize to YYYY-MM-DD
165
+ // ---------------------------------------------------------------------------
166
+
167
+ // Deterministic month-name -> 1..12 table. Covers the full names and the common
168
+ // 3-letter abbreviations QuickBooks/bank exports emit (e.g. "Jan", "Sept").
169
+ // Lower-cased keys; matched case-insensitively. NO locale/Date() dependency, so
170
+ // the same textual date always parses to the same ISO string.
171
+ const MONTH_NAMES = Object.freeze({
172
+ jan: 1, january: 1,
173
+ feb: 2, february: 2,
174
+ mar: 3, march: 3,
175
+ apr: 4, april: 4,
176
+ may: 5,
177
+ jun: 6, june: 6,
178
+ jul: 7, july: 7,
179
+ aug: 8, august: 8,
180
+ sep: 9, sept: 9, september: 9,
181
+ oct: 10, october: 10,
182
+ nov: 11, november: 11,
183
+ dec: 12, december: 12,
184
+ });
185
+
186
+ // Accepts: YYYY-MM-DD, MM/DD/YYYY, M/D/YY, YYYYMMDD (OFX style), and the common
187
+ // textual forms QuickBooks exports use — "Mon DD, YYYY" ("Jan 5, 2024") and
188
+ // "DD-Mon-YYYY" ("5-Jan-2024"). Returns a strict ISO date string, validating
189
+ // the calendar (no 02/30) with a deterministic month-name table (no Date()).
190
+ function parseDate(raw, loc = {}) {
191
+ if (raw == null) throw new IngestError("missing date", loc);
192
+ const s = String(raw).trim();
193
+ if (s === "") throw new IngestError("empty date", loc);
194
+
195
+ let y;
196
+ let mo;
197
+ let d;
198
+
199
+ let m;
200
+ if ((m = s.match(/^(\d{4})-(\d{2})-(\d{2})$/))) {
201
+ [, y, mo, d] = m;
202
+ } else if ((m = s.match(/^(\d{1,2})\/(\d{1,2})\/(\d{2}|\d{4})$/))) {
203
+ mo = m[1];
204
+ d = m[2];
205
+ y = m[3].length === 2 ? `20${m[3]}` : m[3];
206
+ } else if ((m = s.match(/^(\d{4})(\d{2})(\d{2})$/))) {
207
+ // OFX/QFX YYYYMMDD (optionally followed by HHMMSS we ignore upstream).
208
+ [, y, mo, d] = m;
209
+ } else if ((m = s.match(/^([A-Za-z]+)\.?\s+(\d{1,2}),?\s+(\d{4})$/))) {
210
+ // "Mon DD, YYYY" — e.g. "Jan 5, 2024", "January 5 2024", "Sept. 5, 2024".
211
+ const mon = MONTH_NAMES[m[1].toLowerCase()];
212
+ if (mon == null) throw new IngestError(`unrecognized month in date: "${raw}"`, loc);
213
+ mo = String(mon);
214
+ d = m[2];
215
+ y = m[3];
216
+ } else if ((m = s.match(/^(\d{1,2})-([A-Za-z]+)\.?-(\d{2}|\d{4})$/))) {
217
+ // "DD-Mon-YYYY" — e.g. "5-Jan-2024", "05-Jan-24".
218
+ const mon = MONTH_NAMES[m[2].toLowerCase()];
219
+ if (mon == null) throw new IngestError(`unrecognized month in date: "${raw}"`, loc);
220
+ d = m[1];
221
+ mo = String(mon);
222
+ y = m[3].length === 2 ? `20${m[3]}` : m[3];
223
+ } else {
224
+ throw new IngestError(`unrecognized date: "${raw}"`, loc);
225
+ }
226
+
227
+ const yi = Number(y);
228
+ const mi = Number(mo);
229
+ const di = Number(d);
230
+ if (mi < 1 || mi > 12) throw new IngestError(`invalid month in date: "${raw}"`, loc);
231
+ const daysInMonth = [
232
+ 31,
233
+ // leap-year aware February
234
+ (yi % 4 === 0 && yi % 100 !== 0) || yi % 400 === 0 ? 29 : 28,
235
+ 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
236
+ ];
237
+ if (di < 1 || di > daysInMonth[mi - 1]) {
238
+ throw new IngestError(`invalid day in date: "${raw}"`, loc);
239
+ }
240
+ const pad = (n) => String(n).padStart(2, "0");
241
+ return `${yi}-${pad(mi)}-${pad(di)}`;
242
+ }
243
+
244
+ // ---------------------------------------------------------------------------
245
+ // CSV parsing — RFC-4180-ish: quotes, embedded commas/newlines, "" escape
246
+ // ---------------------------------------------------------------------------
247
+
248
+ // Parse CSV text into an array of rows (each a string[]). Handles quoted fields
249
+ // containing commas, newlines, and doubled quotes. Blank lines are dropped.
250
+ function parseCSV(text) {
251
+ const rows = [];
252
+ let field = "";
253
+ let row = [];
254
+ let inQuotes = false;
255
+ let sawAny = false;
256
+
257
+ const pushField = () => {
258
+ row.push(field);
259
+ field = "";
260
+ };
261
+ const pushRow = () => {
262
+ pushField();
263
+ // Drop fully-blank lines (single empty field, nothing else).
264
+ const blank = row.length === 1 && row[0].trim() === "";
265
+ if (!blank) rows.push(row);
266
+ row = [];
267
+ };
268
+
269
+ // Normalize CRLF/CR to LF for a single state machine.
270
+ const s = text.replace(/\r\n?/g, "\n");
271
+
272
+ for (let i = 0; i < s.length; i++) {
273
+ const c = s[i];
274
+ sawAny = true;
275
+ if (inQuotes) {
276
+ if (c === '"') {
277
+ if (s[i + 1] === '"') {
278
+ field += '"';
279
+ i++;
280
+ } else {
281
+ inQuotes = false;
282
+ }
283
+ } else {
284
+ field += c;
285
+ }
286
+ } else if (c === '"') {
287
+ inQuotes = true;
288
+ } else if (c === ",") {
289
+ pushField();
290
+ } else if (c === "\n") {
291
+ pushRow();
292
+ } else {
293
+ field += c;
294
+ }
295
+ }
296
+ // Flush trailing field/row if the text didn't end with a newline.
297
+ if (field !== "" || row.length > 0 || (sawAny && rows.length === 0)) {
298
+ pushRow();
299
+ }
300
+ return rows;
301
+ }
302
+
303
+ // Map header names to column indexes. Case-insensitive, trims, and accepts a
304
+ // list of aliases per logical column. Returns { name -> index }.
305
+ //
306
+ // `columnMap` (T-25.3) is an OPTIONAL pure `{ <logical>: <exactHeaderName> }`
307
+ // escape hatch: for any logical field it names, it OVERRIDES the alias auto-
308
+ // detect and binds that field to the EXACT (case-insensitive, trimmed) header
309
+ // the caller specified — for a file whose headers no alias matches. It is
310
+ // VALIDATED first by validateColumnMap (an unknown logical key, or a header not
311
+ // present in the file, hard-errors naming the available headers). Logical fields
312
+ // NOT named in the map fall through to the normal alias detect, so a partial map
313
+ // only overrides what it touches. With no columnMap, behaviour is unchanged.
314
+ function indexHeader(header, schema, source, columnMap = null) {
315
+ const norm = header.map((h) => String(h).trim().toLowerCase());
316
+ const overrides = columnMap
317
+ ? validateColumnMap(columnMap, header, schema, source)
318
+ : null;
319
+ const out = {};
320
+ for (const [key, aliases] of Object.entries(schema)) {
321
+ if (overrides && Object.prototype.hasOwnProperty.call(overrides, key)) {
322
+ out[key] = overrides[key];
323
+ continue;
324
+ }
325
+ let idx = -1;
326
+ for (const a of aliases) {
327
+ idx = norm.indexOf(a.toLowerCase());
328
+ if (idx !== -1) break;
329
+ }
330
+ out[key] = idx;
331
+ }
332
+ return out;
333
+ }
334
+
335
+ // Validate a `columnMap` against the file's actual header + the source schema,
336
+ // and resolve each entry to a 0-based column index. PURE; throws an IngestError
337
+ // (the existing error style) on:
338
+ // * an unknown logical key (not a field of this source's schema), or
339
+ // * a mapped-to header that is not present in the file.
340
+ // Both messages NAME the available options so a broker can self-correct without
341
+ // reading source. Returns { <logical>: <index> } for the validated entries only.
342
+ function validateColumnMap(columnMap, header, schema, source) {
343
+ const norm = header.map((h) => String(h).trim().toLowerCase());
344
+ const logicalKeys = Object.keys(schema);
345
+ const out = {};
346
+ for (const [logical, wantHeader] of Object.entries(columnMap)) {
347
+ if (!Object.prototype.hasOwnProperty.call(schema, logical)) {
348
+ throw new IngestError(
349
+ `unknown logical field "${logical}" in column map for ${source} ` +
350
+ `(available fields: ${logicalKeys.join(", ")})`,
351
+ { source }
352
+ );
353
+ }
354
+ if (wantHeader == null || String(wantHeader).trim() === "") {
355
+ throw new IngestError(
356
+ `column map for "${logical}" must name a header (got empty value)`,
357
+ { source }
358
+ );
359
+ }
360
+ const idx = norm.indexOf(String(wantHeader).trim().toLowerCase());
361
+ if (idx === -1) {
362
+ throw new IngestError(
363
+ `column map for "${logical}" names header "${wantHeader}" which is not ` +
364
+ `in the file (available headers: ${header.join(", ")})`,
365
+ { source }
366
+ );
367
+ }
368
+ out[logical] = idx;
369
+ }
370
+ return out;
371
+ }
372
+
373
+ function requireCols(cols, names, source) {
374
+ for (const n of names) {
375
+ if (cols[n] === -1 || cols[n] === undefined) {
376
+ throw new IngestError(
377
+ `missing required column "${n}" in header`,
378
+ { source }
379
+ );
380
+ }
381
+ }
382
+ }
383
+
384
+ // Pull a cell, tolerating short rows by treating absent as undefined.
385
+ function cell(arr, idx) {
386
+ if (idx === -1 || idx === undefined) return undefined;
387
+ return arr[idx];
388
+ }
389
+
390
+ // ---------------------------------------------------------------------------
391
+ // Kind classification helpers
392
+ // ---------------------------------------------------------------------------
393
+
394
+ // Infer a coarse kind from a free-text memo/type when the source doesn't give
395
+ // an explicit one. Deterministic keyword match; falls back to sign-based guess.
396
+ function classifyKind(text, amountCents) {
397
+ const t = String(text || "").toLowerCase();
398
+ if (/\bnsf\b|returned|bounced|insufficient|reversal|reverse/.test(t)) {
399
+ return KIND.NSF;
400
+ }
401
+ if (/\bfee\b|service charge|svc chg|charge\b/.test(t)) return KIND.FEE;
402
+ if (/transfer|xfer|ach out|ach in/.test(t)) return KIND.TRANSFER;
403
+ if (/\bcheck\b|chk #|chk#|ck#|draw|disbursement|payee/.test(t)) {
404
+ return KIND.CHECK;
405
+ }
406
+ if (/deposit|rent|payment received|received from/.test(t)) {
407
+ return KIND.DEPOSIT;
408
+ }
409
+ if (/adjust|correction|void/.test(t)) return KIND.ADJUSTMENT;
410
+ // Sign-based fallback: positive => deposit, negative => check.
411
+ if (amountCents > 0) return KIND.DEPOSIT;
412
+ if (amountCents < 0) return KIND.CHECK;
413
+ return KIND.OTHER;
414
+ }
415
+
416
+ // Normalize / validate an explicitly-supplied kind string.
417
+ function coerceKind(raw, fallbackText, amountCents, loc) {
418
+ // An NSF / returned-item is the single most important exception a trust
419
+ // reconciliation must surface, and accounting exports routinely file the
420
+ // reversal under a generic "Deposit"/"Check" type with "NSF" only in the
421
+ // memo. So a returned-item keyword ALWAYS wins over the explicit type — we
422
+ // would rather over-flag than silently fold a reversal into a clean deposit.
423
+ if (/\bnsf\b|returned|bounced|insufficient|reversal/i.test(fallbackText)) {
424
+ return KIND.NSF;
425
+ }
426
+ if (raw == null || String(raw).trim() === "") {
427
+ return classifyKind(fallbackText, amountCents);
428
+ }
429
+ const k = String(raw).trim().toLowerCase();
430
+ if (VALID_KINDS.has(k)) return k;
431
+ // Common aliases.
432
+ const aliases = {
433
+ dep: KIND.DEPOSIT,
434
+ chk: KIND.CHECK,
435
+ cheque: KIND.CHECK,
436
+ payment: KIND.DEPOSIT,
437
+ "service charge": KIND.FEE,
438
+ xfer: KIND.TRANSFER,
439
+ returned: KIND.NSF,
440
+ bounce: KIND.NSF,
441
+ adj: KIND.ADJUSTMENT,
442
+ };
443
+ if (aliases[k]) return aliases[k];
444
+ // Unknown kind word is not fatal — classify from text/sign but keep going.
445
+ return classifyKind(`${raw} ${fallbackText}`, amountCents);
446
+ }
447
+
448
+ function makeRecord({ date, amount, memo, kind, party, source }) {
449
+ return {
450
+ date,
451
+ amount,
452
+ memo: String(memo == null ? "" : memo).trim(),
453
+ kind,
454
+ party: String(party == null ? "" : party).trim(),
455
+ source,
456
+ };
457
+ }
458
+
459
+ // ---------------------------------------------------------------------------
460
+ // (a) BANK STATEMENT — CSV or OFX/QFX
461
+ // ---------------------------------------------------------------------------
462
+
463
+ // Bank CSVs vary wildly; we support BOTH common shapes:
464
+ // * a single signed Amount column, OR
465
+ // * separate Debit / Credit columns (debit => money out => negative).
466
+ const BANK_SCHEMA = {
467
+ date: ["date", "posted", "posting date", "transaction date", "trans date"],
468
+ amount: ["amount", "amt"],
469
+ debit: [
470
+ "debit",
471
+ "withdrawal",
472
+ "withdrawals",
473
+ "money out",
474
+ // real bank exports (Chase/BofA/Wells/QB CSV) — money OUT columns
475
+ "withdrawal amt",
476
+ "withdrawal amount",
477
+ "debit amt",
478
+ "debit amount",
479
+ ],
480
+ credit: [
481
+ "credit",
482
+ "deposit",
483
+ "deposits",
484
+ "money in",
485
+ // real bank exports — money IN columns
486
+ "deposit amt",
487
+ "deposit amount",
488
+ "credit amt",
489
+ "credit amount",
490
+ ],
491
+ memo: ["description", "memo", "details", "name", "payee", "check number", "check #", "check no"],
492
+ type: ["type", "transaction type"],
493
+ };
494
+
495
+ // Build ONE normalized bank record from a parsed row, given the column map and
496
+ // the signed/split detection. PURE: throws an IngestError (with `loc`) on any
497
+ // bad cell, exactly as the strict parser must. The strict parser and the
498
+ // diagnostic parser share this single copy of the per-row logic — they differ
499
+ // ONLY in that the diagnostic path wraps it in try/catch to accumulate errors.
500
+ function buildBankRecord(arr, cols, hasSigned, loc) {
501
+ const date = parseDate(cell(arr, cols.date), loc);
502
+
503
+ let amount;
504
+ if (hasSigned) {
505
+ amount = parseCents(cell(arr, cols.amount), "amount", loc);
506
+ } else {
507
+ const dRaw = cell(arr, cols.debit);
508
+ const cRaw = cell(arr, cols.credit);
509
+ const dHas = dRaw != null && String(dRaw).trim() !== "";
510
+ const cHas = cRaw != null && String(cRaw).trim() !== "";
511
+ if (dHas && cHas) {
512
+ throw new IngestError("row has BOTH a debit and a credit value", loc);
513
+ }
514
+ if (!dHas && !cHas) {
515
+ throw new IngestError("row has neither debit nor credit value", loc);
516
+ }
517
+ if (dHas) {
518
+ const v = parseCents(dRaw, "debit", loc);
519
+ amount = -Math.abs(v);
520
+ } else {
521
+ const v = parseCents(cRaw, "credit", loc);
522
+ amount = Math.abs(v);
523
+ }
524
+ }
525
+
526
+ const memo = cell(arr, cols.memo) || "";
527
+ const typeText = cell(arr, cols.type) || "";
528
+ const kind = coerceKind(typeText, `${typeText} ${memo}`, amount, loc);
529
+ return makeRecord({
530
+ date,
531
+ amount,
532
+ memo,
533
+ kind,
534
+ party: "",
535
+ source: SOURCE.BANK,
536
+ });
537
+ }
538
+
539
+ function parseBankCSV(text, opts = {}) {
540
+ const rows = parseCSV(text);
541
+ if (rows.length === 0) {
542
+ throw new IngestError("empty bank statement", { source: SOURCE.BANK });
543
+ }
544
+ const cols = indexHeader(rows[0], BANK_SCHEMA, SOURCE.BANK, opts.columnMap);
545
+ requireCols(cols, ["date"], SOURCE.BANK);
546
+ const hasSigned = cols.amount !== -1;
547
+ const hasSplit = cols.debit !== -1 || cols.credit !== -1;
548
+ if (!hasSigned && !hasSplit) {
549
+ throw new IngestError(
550
+ 'bank statement needs an "amount" column or debit/credit columns',
551
+ { source: SOURCE.BANK }
552
+ );
553
+ }
554
+
555
+ const out = [];
556
+ for (let r = 1; r < rows.length; r++) {
557
+ const loc = { row: r, source: SOURCE.BANK };
558
+ out.push(buildBankRecord(rows[r], cols, hasSigned, loc));
559
+ }
560
+ return out;
561
+ }
562
+
563
+ // Pull a single (possibly unclosed, SGML-style) tag value from an OFX block:
564
+ // "everything up to the next '<' or newline".
565
+ function ofxTagVal(block, tag) {
566
+ const m = block.match(new RegExp(`<${tag}>([^<\\r\\n]*)`, "i"));
567
+ return m ? m[1].trim() : undefined;
568
+ }
569
+
570
+ // Split an OFX/QFX document into its <STMTTRN> transaction blocks. Throws when
571
+ // the text is plainly not an OFX document at all (so a misrouted CSV is a clear
572
+ // error, not a silent empty result).
573
+ function ofxBlocks(text) {
574
+ const blocks = text.match(/<STMTTRN>[\s\S]*?<\/STMTTRN>/gi) || [];
575
+ if (blocks.length === 0 && !/<OFX>|<STMTTRN>/i.test(text)) {
576
+ throw new IngestError("not an OFX/QFX document", { source: SOURCE.BANK });
577
+ }
578
+ return blocks;
579
+ }
580
+
581
+ // Build ONE normalized bank record from a single OFX <STMTTRN> block. PURE;
582
+ // throws an IngestError (with `loc`) on any bad/missing tag, exactly like the
583
+ // CSV per-row builders. Shared verbatim by the strict and diagnostic OFX paths.
584
+ function buildOFXRecord(block, loc) {
585
+ const dtRaw = ofxTagVal(block, "DTPOSTED");
586
+ if (dtRaw == null) throw new IngestError("OFX txn missing DTPOSTED", loc);
587
+ // DTPOSTED may include time/zone: take the leading YYYYMMDD.
588
+ const date = parseDate(dtRaw.slice(0, 8), loc);
589
+ const amount = parseCents(ofxTagVal(block, "TRNAMT"), "TRNAMT", loc);
590
+ const memo = ofxTagVal(block, "MEMO") || ofxTagVal(block, "NAME") || "";
591
+ const trntype = ofxTagVal(block, "TRNTYPE") || "";
592
+ const kind = coerceKind(trntype, `${trntype} ${memo}`, amount, loc);
593
+ return makeRecord({ date, amount, memo, kind, party: "", source: SOURCE.BANK });
594
+ }
595
+
596
+ // Minimal OFX/QFX SGML reader: pull each <STMTTRN> block's fields. We only need
597
+ // TRNTYPE, DTPOSTED, TRNAMT, NAME/MEMO. OFX tags are often unclosed (SGML), so
598
+ // we read each tag's value as "everything up to the next '<'".
599
+ function parseOFX(text) {
600
+ const out = [];
601
+ ofxBlocks(text).forEach((block, i) => {
602
+ out.push(buildOFXRecord(block, { row: i + 1, source: SOURCE.BANK }));
603
+ });
604
+ return out;
605
+ }
606
+
607
+ // Auto-detect OFX vs CSV from the content; `format` ("csv"|"ofx") forces it.
608
+ // `columnMap` (CSV only) overrides the alias auto-detect — OFX has no CSV header.
609
+ function parseBankStatement(text, { format, columnMap } = {}) {
610
+ if (text == null) throw new IngestError("no bank input", { source: SOURCE.BANK });
611
+ const fmt = format || (/<OFX>|<STMTTRN>|OFXHEADER/i.test(text) ? "ofx" : "csv");
612
+ if (fmt === "ofx") return parseOFX(text);
613
+ return parseBankCSV(text, { columnMap });
614
+ }
615
+
616
+ // ---------------------------------------------------------------------------
617
+ // (b) QUICKBOOKS trust-ledger CSV
618
+ // ---------------------------------------------------------------------------
619
+
620
+ // A QuickBooks account "transaction detail" export. QB typically emits separate
621
+ // Debit (money out of the bank/trust register) and Credit (money in) columns,
622
+ // plus Type, Name, Memo, Date. We treat Credit as +, Debit as -, matching the
623
+ // bank's signed convention so the two can be reconciled directly.
624
+ const QB_SCHEMA = {
625
+ date: ["date", "trans date", "transaction date"],
626
+ type: ["type", "transaction type"],
627
+ party: [
628
+ "name",
629
+ "payee",
630
+ "customer",
631
+ "vendor",
632
+ "received from",
633
+ "paid to",
634
+ // QuickBooks "transaction detail" report columns
635
+ "split",
636
+ "account",
637
+ ],
638
+ // QB exports often carry the check/reference number in a "Num" column and a
639
+ // cleared flag in "Clr"; fold them into the free-text memo so they survive.
640
+ memo: ["memo", "description", "memo/description", "num", "clr"],
641
+ debit: ["debit", "payment", "decrease"],
642
+ credit: ["credit", "deposit", "increase"],
643
+ amount: ["amount", "amt"],
644
+ };
645
+
646
+ // Build ONE normalized QuickBooks record from a parsed row. PURE; throws on a
647
+ // bad cell. Shared verbatim by the strict and diagnostic QuickBooks parsers.
648
+ function buildQuickBooksRecord(arr, cols, hasSigned, loc) {
649
+ const date = parseDate(cell(arr, cols.date), loc);
650
+
651
+ let amount;
652
+ if (hasSigned) {
653
+ amount = parseCents(cell(arr, cols.amount), "amount", loc);
654
+ } else {
655
+ const dRaw = cell(arr, cols.debit);
656
+ const cRaw = cell(arr, cols.credit);
657
+ const dHas = dRaw != null && String(dRaw).trim() !== "";
658
+ const cHas = cRaw != null && String(cRaw).trim() !== "";
659
+ if (dHas && cHas) {
660
+ throw new IngestError("row has BOTH debit and credit values", loc);
661
+ }
662
+ if (!dHas && !cHas) {
663
+ throw new IngestError("row has neither debit nor credit value", loc);
664
+ }
665
+ amount = dHas
666
+ ? -Math.abs(parseCents(dRaw, "debit", loc))
667
+ : Math.abs(parseCents(cRaw, "credit", loc));
668
+ }
669
+
670
+ const memo = cell(arr, cols.memo) || "";
671
+ const party = cell(arr, cols.party) || "";
672
+ const typeText = cell(arr, cols.type) || "";
673
+ const kind = coerceKind(typeText, `${typeText} ${memo}`, amount, loc);
674
+ return makeRecord({
675
+ date,
676
+ amount,
677
+ memo,
678
+ kind,
679
+ party,
680
+ source: SOURCE.QUICKBOOKS,
681
+ });
682
+ }
683
+
684
+ function parseQuickBooksCSV(text, opts = {}) {
685
+ if (text == null) {
686
+ throw new IngestError("no QuickBooks input", { source: SOURCE.QUICKBOOKS });
687
+ }
688
+ const rows = parseCSV(text);
689
+ if (rows.length === 0) {
690
+ throw new IngestError("empty QuickBooks export", {
691
+ source: SOURCE.QUICKBOOKS,
692
+ });
693
+ }
694
+ const cols = indexHeader(rows[0], QB_SCHEMA, SOURCE.QUICKBOOKS, opts.columnMap);
695
+ requireCols(cols, ["date"], SOURCE.QUICKBOOKS);
696
+ const hasSigned = cols.amount !== -1;
697
+ const hasSplit = cols.debit !== -1 || cols.credit !== -1;
698
+ if (!hasSigned && !hasSplit) {
699
+ throw new IngestError(
700
+ 'QuickBooks export needs an "amount" column or debit/credit columns',
701
+ { source: SOURCE.QUICKBOOKS }
702
+ );
703
+ }
704
+
705
+ const out = [];
706
+ for (let r = 1; r < rows.length; r++) {
707
+ const loc = { row: r, source: SOURCE.QUICKBOOKS };
708
+ out.push(buildQuickBooksRecord(rows[r], cols, hasSigned, loc));
709
+ }
710
+ return out;
711
+ }
712
+
713
+ // ---------------------------------------------------------------------------
714
+ // (c) RENT-ROLL / tenant sub-ledger CSV
715
+ // ---------------------------------------------------------------------------
716
+
717
+ // A per-tenant sub-ledger: each row is a charge or a payment against a tenant.
718
+ // Convention here: a tenant PAYMENT is money INTO the trust account (+), a
719
+ // CHARGE/assessment is what the tenant owes and is recorded as negative on the
720
+ // cash side only when it represents an outflow; for reconciliation against the
721
+ // bank we care about CASH events, so charges (non-cash) are tagged but kept
722
+ // with their signed cash effect (0 unless they move money).
723
+ //
724
+ // To stay simple and cash-focused we accept either:
725
+ // * a signed Amount (positive = payment received), OR
726
+ // * separate Payment / Charge columns (payment => +, charge => recorded but
727
+ // non-cash, sign 0 unless it is a refund which is negative cash).
728
+ const RENT_SCHEMA = {
729
+ date: ["date", "posted", "transaction date"],
730
+ tenant: ["tenant", "name", "resident", "lessee", "party", "lease"],
731
+ unit: ["unit", "apt", "apartment", "property", "door"],
732
+ memo: ["memo", "description", "note", "charge type", "details"],
733
+ amount: ["amount", "amt"],
734
+ payment: ["payment", "paid", "received", "credit", "amount paid"],
735
+ charge: ["charge", "owed", "assessment", "debit", "amount due"],
736
+ type: ["type", "transaction type"],
737
+ };
738
+
739
+ // Build ONE normalized rent-roll record from a parsed row. PURE; throws on a
740
+ // bad cell or a missing tenant. Shared verbatim by the strict and diagnostic
741
+ // rent-roll parsers.
742
+ function buildRentRollRecord(arr, cols, hasSigned, loc) {
743
+ const date = parseDate(cell(arr, cols.date), loc);
744
+ const tenant = cell(arr, cols.tenant);
745
+ if (tenant == null || String(tenant).trim() === "") {
746
+ throw new IngestError("rent-roll row missing tenant", loc);
747
+ }
748
+ const unit = cell(arr, cols.unit);
749
+
750
+ let amount;
751
+ let kindHint;
752
+ if (hasSigned) {
753
+ amount = parseCents(cell(arr, cols.amount), "amount", loc);
754
+ kindHint = amount >= 0 ? KIND.DEPOSIT : KIND.CHECK;
755
+ } else {
756
+ const pRaw = cell(arr, cols.payment);
757
+ const cRaw = cell(arr, cols.charge);
758
+ const pHas = pRaw != null && String(pRaw).trim() !== "";
759
+ const cHas = cRaw != null && String(cRaw).trim() !== "";
760
+ if (pHas && cHas) {
761
+ throw new IngestError(
762
+ "rent-roll row has BOTH a payment and a charge",
763
+ loc
764
+ );
765
+ }
766
+ if (!pHas && !cHas) {
767
+ throw new IngestError(
768
+ "rent-roll row has neither payment nor charge",
769
+ loc
770
+ );
771
+ }
772
+ if (pHas) {
773
+ amount = Math.abs(parseCents(pRaw, "payment", loc));
774
+ kindHint = KIND.DEPOSIT;
775
+ } else {
776
+ // A charge is an accrual, not a cash movement: record it but with a
777
+ // negative sign reflecting what the tenant owes the trust ledger.
778
+ amount = -Math.abs(parseCents(cRaw, "charge", loc));
779
+ kindHint = KIND.ADJUSTMENT;
780
+ }
781
+ }
782
+
783
+ const memoRaw = cell(arr, cols.memo) || "";
784
+ const typeText = cell(arr, cols.type) || "";
785
+ // Let explicit type/keywords (e.g. "NSF") override the cash-based hint.
786
+ let kind = coerceKind(typeText, `${typeText} ${memoRaw}`, amount, loc);
787
+ if (
788
+ (typeText == null || typeText.trim() === "") &&
789
+ !/nsf|returned|fee|transfer|adjust|void/i.test(memoRaw)
790
+ ) {
791
+ kind = kindHint;
792
+ }
793
+ const party = unit
794
+ ? `${String(tenant).trim()} (${String(unit).trim()})`
795
+ : String(tenant).trim();
796
+ return makeRecord({
797
+ date,
798
+ amount,
799
+ memo: memoRaw,
800
+ kind,
801
+ party,
802
+ source: SOURCE.RENT_ROLL,
803
+ });
804
+ }
805
+
806
+ function parseRentRollCSV(text, opts = {}) {
807
+ if (text == null) {
808
+ throw new IngestError("no rent-roll input", { source: SOURCE.RENT_ROLL });
809
+ }
810
+ const rows = parseCSV(text);
811
+ if (rows.length === 0) {
812
+ throw new IngestError("empty rent roll", { source: SOURCE.RENT_ROLL });
813
+ }
814
+ const cols = indexHeader(rows[0], RENT_SCHEMA, SOURCE.RENT_ROLL, opts.columnMap);
815
+ requireCols(cols, ["date", "tenant"], SOURCE.RENT_ROLL);
816
+ const hasSigned = cols.amount !== -1;
817
+ const hasSplit = cols.payment !== -1 || cols.charge !== -1;
818
+ if (!hasSigned && !hasSplit) {
819
+ throw new IngestError(
820
+ 'rent roll needs an "amount" column or payment/charge columns',
821
+ { source: SOURCE.RENT_ROLL }
822
+ );
823
+ }
824
+
825
+ const out = [];
826
+ for (let r = 1; r < rows.length; r++) {
827
+ const loc = { row: r, source: SOURCE.RENT_ROLL };
828
+ out.push(buildRentRollRecord(rows[r], cols, hasSigned, loc));
829
+ }
830
+ return out;
831
+ }
832
+
833
+ // ---------------------------------------------------------------------------
834
+ // Diagnostic ingest core (T-25.1) — parse-WITH-report, never fail-closed
835
+ // ---------------------------------------------------------------------------
836
+ //
837
+ // The strict parsers above (parseBankStatement / parseQuickBooksCSV /
838
+ // parseRentRollCSV) fail CLOSED: the first malformed row aborts the whole file.
839
+ // That is correct for the reconcile path — a trust reconciliation must NEVER
840
+ // silently partial-parse, because a dropped row hides the exact discrepancy the
841
+ // broker is legally on the hook to find.
842
+ //
843
+ // But ONBOARDING needs the opposite: when a broker first feeds the tool a real
844
+ // export, they need to SEE what happened — which header columns mapped to which
845
+ // logical field, how many rows normalized, and EVERY row that failed (not just
846
+ // the first) — so they can fix the file or supply a column map. That is what the
847
+ // `diagnose*` family provides.
848
+ //
849
+ // CRITICAL INVARIANT: the diagnostic path REUSES the exact same per-row builders
850
+ // (buildBankRecord / buildQuickBooksRecord / buildRentRollRecord) and the same
851
+ // primitives (parseCSV / indexHeader / parseDate / parseCents / coerceKind) that
852
+ // the strict parsers use. It re-implements NONE of the parse logic. It differs
853
+ // from the strict parsers in EXACTLY two ways:
854
+ // (1) it wraps each per-row build in try/catch and ACCUMULATES IngestErrors
855
+ // instead of throwing on the first, and
856
+ // (2) it returns the detected header + the logical->header column map.
857
+ // A missing REQUIRED column is reported in `requiredMissing` (still a hard
858
+ // problem, surfaced to the caller) rather than collapsing the whole file.
859
+ //
860
+ // `diagnose*` is PURE and side-effect-free: no I/O, no clock, no globals. Given
861
+ // the same (text, opts) it returns a byte-identical report.
862
+
863
+ // Per-source diagnostic config. Each entry names the schema, the REQUIRED
864
+ // logical columns, and the per-row builder + amount-mode detector reused from
865
+ // the strict path. Centralizing this keeps the strict and diagnostic paths in
866
+ // lock-step: they consult the SAME schema and the SAME required set.
867
+ const DIAGNOSE_CONFIG = Object.freeze({
868
+ [SOURCE.BANK]: {
869
+ schema: BANK_SCHEMA,
870
+ required: ["date"],
871
+ // logical fields whose presence (any one) is also required for a usable file
872
+ amountGroups: [["amount"], ["debit", "credit"]],
873
+ amountGroupMessage:
874
+ 'bank statement needs an "amount" column or debit/credit columns',
875
+ build: buildBankRecord,
876
+ },
877
+ [SOURCE.QUICKBOOKS]: {
878
+ schema: QB_SCHEMA,
879
+ required: ["date"],
880
+ amountGroups: [["amount"], ["debit", "credit"]],
881
+ amountGroupMessage:
882
+ 'QuickBooks export needs an "amount" column or debit/credit columns',
883
+ build: buildQuickBooksRecord,
884
+ },
885
+ [SOURCE.RENT_ROLL]: {
886
+ schema: RENT_SCHEMA,
887
+ required: ["date", "tenant"],
888
+ amountGroups: [["amount"], ["payment", "charge"]],
889
+ amountGroupMessage:
890
+ 'rent roll needs an "amount" column or payment/charge columns',
891
+ build: buildRentRollRecord,
892
+ },
893
+ });
894
+
895
+ // Diagnose an OFX/QFX bank file: the same parse-WITH-report contract as the CSV
896
+ // path, but OFX has no header row / column map — it is a stream of <STMTTRN>
897
+ // blocks. We REUSE buildOFXRecord verbatim (the strict OFX path uses the same
898
+ // builder) and accumulate per-transaction errors instead of failing closed. The
899
+ // report keeps the SAME shape so inspect renders it uniformly; `format` is set
900
+ // to "ofx", `header`/`mapped` reflect the OFX tags rather than CSV columns.
901
+ function diagnoseOFX(text, sampleSize) {
902
+ const report = {
903
+ source: SOURCE.BANK,
904
+ format: "ofx",
905
+ // For OFX there is no CSV header row; surface the OFX tags we read so the
906
+ // human view still has a "what columns did you see" line.
907
+ header: ["DTPOSTED", "TRNAMT", "TRNTYPE", "NAME/MEMO"],
908
+ mapped: {
909
+ date: "DTPOSTED",
910
+ amount: "TRNAMT",
911
+ type: "TRNTYPE",
912
+ memo: "NAME/MEMO",
913
+ },
914
+ requiredMissing: [],
915
+ rowCount: 0,
916
+ okCount: 0,
917
+ records: [],
918
+ errors: [],
919
+ sample: [],
920
+ };
921
+
922
+ let blocks;
923
+ try {
924
+ blocks = ofxBlocks(text);
925
+ } catch (err) {
926
+ if (err instanceof IngestError) {
927
+ report.errors.push({ row: null, message: err.message });
928
+ return report;
929
+ }
930
+ throw err;
931
+ }
932
+ if (blocks.length === 0) {
933
+ report.errors.push({
934
+ row: null,
935
+ message: "OFX document has no <STMTTRN> transactions",
936
+ });
937
+ return report;
938
+ }
939
+
940
+ blocks.forEach((block, i) => {
941
+ report.rowCount += 1;
942
+ const loc = { row: i + 1, source: SOURCE.BANK };
943
+ try {
944
+ const rec = buildOFXRecord(block, loc);
945
+ report.records.push(rec);
946
+ report.okCount += 1;
947
+ if (report.sample.length < sampleSize) report.sample.push(rec);
948
+ } catch (err) {
949
+ if (err instanceof IngestError) {
950
+ report.errors.push({ row: i + 1, message: err.message });
951
+ } else {
952
+ throw err;
953
+ }
954
+ }
955
+ });
956
+ return report;
957
+ }
958
+
959
+ // The single diagnostic driver. `source` selects the config; `text` is the raw
960
+ // file; `opts.sampleSize` controls how many ok rows are echoed in `sample`
961
+ // (default 5). For the bank source `opts.format` ("csv"|"ofx") forces the file
962
+ // format; otherwise it is auto-detected exactly like `parseBankStatement`, so
963
+ // inspect gives the SAME answer the reconcile pipeline would for OFX/QFX exports.
964
+ // Returns the structured report described in the module header.
965
+ function diagnoseSource(source, text, opts = {}) {
966
+ const cfg = DIAGNOSE_CONFIG[source];
967
+ if (!cfg) {
968
+ throw new IngestError(`unknown source "${source}" for diagnose`);
969
+ }
970
+ const sampleSize = opts.sampleSize == null ? 5 : opts.sampleSize;
971
+
972
+ // Bank files may be OFX/QFX. Honour an explicit format, else auto-detect with
973
+ // the SAME predicate parseBankStatement uses, and route to the OFX diagnostic
974
+ // path so the onboarding tool never gives a worse answer than the real pipeline.
975
+ if (source === SOURCE.BANK && text != null) {
976
+ const fmt =
977
+ opts.format ||
978
+ (/<OFX>|<STMTTRN>|OFXHEADER/i.test(text) ? "ofx" : "csv");
979
+ if (fmt === "ofx") return diagnoseOFX(text, sampleSize);
980
+ }
981
+
982
+ const report = {
983
+ source,
984
+ format: "csv",
985
+ header: [],
986
+ mapped: {},
987
+ requiredMissing: [],
988
+ rowCount: 0,
989
+ okCount: 0,
990
+ records: [],
991
+ errors: [],
992
+ sample: [],
993
+ };
994
+
995
+ // A null/empty file is a whole-file problem, not a row problem. Report it as a
996
+ // hard error rather than throwing, so the inspect command can surface it.
997
+ if (text == null) {
998
+ report.errors.push({ row: null, message: `no ${source} input` });
999
+ return report;
1000
+ }
1001
+
1002
+ const rows = parseCSV(text);
1003
+ if (rows.length === 0) {
1004
+ report.errors.push({ row: null, message: `empty ${source} file` });
1005
+ return report;
1006
+ }
1007
+
1008
+ const header = rows[0].map((h) => String(h));
1009
+ report.header = header.slice();
1010
+
1011
+ // Reuse indexHeader VERBATIM (including the SAME columnMap the reconcile run
1012
+ // will use, so `inspect` previews under the identical mapping), then translate
1013
+ // each index back to the ORIGINAL header name (or null when unmatched) so the
1014
+ // caller sees which column satisfied each logical field. A malformed columnMap
1015
+ // (unknown logical key or a header absent from the file) hard-errors here with
1016
+ // the SAME message the strict parser would give — surfaced as a file-level
1017
+ // error rather than crashing, so inspect can render it.
1018
+ let cols;
1019
+ try {
1020
+ cols = indexHeader(header, cfg.schema, source, opts.columnMap);
1021
+ } catch (err) {
1022
+ if (err instanceof IngestError) {
1023
+ report.errors.push({ row: null, message: err.message });
1024
+ report.rowCount = Math.max(rows.length - 1, 0);
1025
+ return report;
1026
+ }
1027
+ throw err;
1028
+ }
1029
+ for (const key of Object.keys(cfg.schema)) {
1030
+ const idx = cols[key];
1031
+ report.mapped[key] = idx === -1 || idx === undefined ? null : header[idx];
1032
+ }
1033
+
1034
+ // Missing REQUIRED columns are surfaced (hard problem) but do NOT collapse the
1035
+ // whole file — we still echo the header and the partial map back so the broker
1036
+ // can see exactly what to add or remap.
1037
+ for (const n of cfg.required) {
1038
+ if (cols[n] === -1 || cols[n] === undefined) {
1039
+ report.requiredMissing.push(n);
1040
+ }
1041
+ }
1042
+
1043
+ // An amount group must be present (signed amount OR a split pair). If none is,
1044
+ // record it as a hard error; without it, no row can yield a usable amount.
1045
+ const groupPresent = cfg.amountGroups.some((group) =>
1046
+ group.some((k) => cols[k] !== -1 && cols[k] !== undefined)
1047
+ );
1048
+ const hasSigned = cols.amount !== -1 && cols.amount !== undefined;
1049
+
1050
+ // If a required column or the amount group is missing, the per-row builder
1051
+ // would throw the SAME structural error on every single row (e.g. "missing
1052
+ // date"), which is noise. Report the structural problems once and return — the
1053
+ // caller fixes the header first, then re-runs to see row-level errors.
1054
+ if (report.requiredMissing.length > 0 || !groupPresent) {
1055
+ if (!groupPresent) {
1056
+ report.errors.push({ row: null, message: cfg.amountGroupMessage });
1057
+ }
1058
+ report.rowCount = Math.max(rows.length - 1, 0);
1059
+ return report;
1060
+ }
1061
+
1062
+ for (let r = 1; r < rows.length; r++) {
1063
+ report.rowCount += 1;
1064
+ const loc = { row: r, source };
1065
+ try {
1066
+ const rec = cfg.build(rows[r], cols, hasSigned, loc);
1067
+ report.records.push(rec);
1068
+ report.okCount += 1;
1069
+ if (report.sample.length < sampleSize) report.sample.push(rec);
1070
+ } catch (err) {
1071
+ if (err instanceof IngestError) {
1072
+ report.errors.push({ row: r, message: err.message });
1073
+ } else {
1074
+ throw err; // a non-ingest bug is real — do not swallow it
1075
+ }
1076
+ }
1077
+ }
1078
+
1079
+ return report;
1080
+ }
1081
+
1082
+ // Pre-flight a resolved columnMap for a source against a file's actual header,
1083
+ // WITHOUT parsing any data rows. Reuses the SAME parseCSV + per-source schema +
1084
+ // validateColumnMap the strict parsers use, so it accepts/rejects EXACTLY what
1085
+ // the strict parse would — but it throws the IngestError EARLY (before any row
1086
+ // work), letting the CLI classify a bad map as a USAGE error (a bad flag value)
1087
+ // rather than an IO/data error. PURE; no I/O, no clock.
1088
+ //
1089
+ // For the bank source an OFX/QFX document has NO CSV header row and ignores the
1090
+ // columnMap entirely (parseBankStatement routes OFX past it), so this is a no-op
1091
+ // for OFX — there is nothing to validate against and nothing the strict parse
1092
+ // would reject. `opts.format` ("csv"|"ofx") forces the bank format; otherwise it
1093
+ // is auto-detected with the SAME predicate parseBankStatement uses.
1094
+ function validateColumnMapForSource(source, text, columnMap, opts = {}) {
1095
+ if (!columnMap || Object.keys(columnMap).length === 0) return;
1096
+ const cfg = DIAGNOSE_CONFIG[source];
1097
+ if (!cfg) {
1098
+ throw new IngestError(`unknown source "${source}" for column-map validation`);
1099
+ }
1100
+ // OFX bank files carry no header to validate the map against; the strict
1101
+ // parser ignores columnMap for OFX, so skip (no-op), matching that behaviour.
1102
+ if (source === SOURCE.BANK && text != null) {
1103
+ const fmt =
1104
+ opts.format ||
1105
+ (/<OFX>|<STMTTRN>|OFXHEADER/i.test(text) ? "ofx" : "csv");
1106
+ if (fmt === "ofx") return;
1107
+ }
1108
+ if (text == null) return; // a null file is its own (later) error, not a map error
1109
+ const rows = parseCSV(text);
1110
+ if (rows.length === 0) return; // an empty file is its own (later) error
1111
+ // Throws an IngestError (naming available headers/fields) on a bad entry.
1112
+ validateColumnMap(columnMap, rows[0], cfg.schema, source);
1113
+ }
1114
+
1115
+ // Report the accepted header ALIASES for a logical field of a source. The
1116
+ // inspect/onboarding path uses this to print an ACTIONABLE hint ("add a column
1117
+ // named one of [...]") without re-declaring the schema — it reads the SAME
1118
+ // schema the diagnostic + strict parsers consult, so the hint can never drift
1119
+ // from what the parser actually accepts. Returns [] for an unknown field.
1120
+ function aliasesFor(source, logical) {
1121
+ const cfg = DIAGNOSE_CONFIG[source];
1122
+ if (!cfg) throw new IngestError(`unknown source "${source}" for aliasesFor`);
1123
+ const a = cfg.schema[logical];
1124
+ return Array.isArray(a) ? a.slice() : [];
1125
+ }
1126
+
1127
+ // Convenience per-source wrappers (the `diagnose{Bank,QuickBooks,RentRoll}`
1128
+ // family named in the acceptance), each a thin call into diagnoseSource.
1129
+ function diagnoseBank(text, opts) {
1130
+ return diagnoseSource(SOURCE.BANK, text, opts);
1131
+ }
1132
+ function diagnoseQuickBooks(text, opts) {
1133
+ return diagnoseSource(SOURCE.QUICKBOOKS, text, opts);
1134
+ }
1135
+ function diagnoseRentRoll(text, opts) {
1136
+ return diagnoseSource(SOURCE.RENT_ROLL, text, opts);
1137
+ }
1138
+
1139
+ module.exports = {
1140
+ SOURCE,
1141
+ KIND,
1142
+ IngestError,
1143
+ // primitives (exported for focused tests / reuse)
1144
+ parseCents,
1145
+ parseDate,
1146
+ parseCSV,
1147
+ classifyKind,
1148
+ // the three normalizers
1149
+ validateColumnMap,
1150
+ parseBankStatement,
1151
+ parseBankCSV,
1152
+ parseOFX,
1153
+ diagnoseOFX,
1154
+ parseQuickBooksCSV,
1155
+ parseRentRollCSV,
1156
+ validateColumnMapForSource,
1157
+ // diagnostic ingest core (T-25.1) — parse-with-report, never fail-closed
1158
+ diagnoseSource,
1159
+ diagnoseBank,
1160
+ diagnoseQuickBooks,
1161
+ diagnoseRentRoll,
1162
+ aliasesFor,
1163
+ };