verifyhash 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +883 -0
- package/cli/abi/ContributionRegistry.json +881 -0
- package/cli/agent.js +2173 -0
- package/cli/anchor-artifact.js +853 -0
- package/cli/anchor.js +400 -0
- package/cli/claim.js +881 -0
- package/cli/core/agent-commit.js +448 -0
- package/cli/core/agent-session.js +598 -0
- package/cli/core/anchor-binding.js +663 -0
- package/cli/core/attestation.js +580 -0
- package/cli/core/evidence-plans.js +495 -0
- package/cli/core/fixtures/evidence-plans/baseline.json +19 -0
- package/cli/core/fulfill-intake.js +1082 -0
- package/cli/core/go-live-preflight.js +481 -0
- package/cli/core/license.js +534 -0
- package/cli/core/manifest.js +243 -0
- package/cli/core/packetseal.js +591 -0
- package/cli/core/registryArtifact.js +49 -0
- package/cli/core/revocation.js +539 -0
- package/cli/core/rfc3161.js +389 -0
- package/cli/core/timestamp.js +482 -0
- package/cli/core/trust-asof.js +479 -0
- package/cli/dataset.js +2950 -0
- package/cli/evidence.js +2227 -0
- package/cli/fulfill-webhook-http.js +438 -0
- package/cli/git.js +220 -0
- package/cli/hash.js +550 -0
- package/cli/identity.js +1072 -0
- package/cli/journal-cli.js +1110 -0
- package/cli/journal-log.js +454 -0
- package/cli/journal.js +334 -0
- package/cli/lineage.js +447 -0
- package/cli/list.js +287 -0
- package/cli/parcel.js +1509 -0
- package/cli/proof.js +578 -0
- package/cli/prove.js +300 -0
- package/cli/receipt.js +631 -0
- package/cli/registry.js +331 -0
- package/cli/reputation.js +344 -0
- package/cli/revocation.js +495 -0
- package/cli/serve-verify-http.js +298 -0
- package/cli/serve-verify.js +333 -0
- package/cli/show.js +339 -0
- package/cli/verify.js +383 -0
- package/cli/vh.js +3927 -0
- package/docs/ADOPT.md +183 -0
- package/docs/ADOPTION.json +11 -0
- package/docs/AGENTTRACE.md +247 -0
- package/docs/ANCHORING.md +167 -0
- package/docs/AUDIT.md +55 -0
- package/docs/CONFORMANCE.md +107 -0
- package/docs/DATALEDGER.md +638 -0
- package/docs/DECIDE.md +47 -0
- package/docs/DECISIONS-PENDING.md +27 -0
- package/docs/DEPLOY-PUBLIC-SITE.md +301 -0
- package/docs/ENGINE-LEDGER.json +12 -0
- package/docs/EVIDENCE.md +519 -0
- package/docs/GO-LIVE.md +66 -0
- package/docs/IDENTITY.md +123 -0
- package/docs/INDEPENDENT-VERIFICATION.md +377 -0
- package/docs/INTEGRITY-JOURNAL.md +337 -0
- package/docs/KEY-LIFECYCLE.md +179 -0
- package/docs/LICENSING.md +46 -0
- package/docs/LINEAGE.md +307 -0
- package/docs/LOOP-AUDIT-2026-07-03.json +580 -0
- package/docs/LOOP-HARDENING-PLAN.md +44 -0
- package/docs/MERKLE-LEAVES.md +113 -0
- package/docs/METRICS.jsonl +31 -0
- package/docs/MORNING.md +204 -0
- package/docs/PILOT.md +444 -0
- package/docs/PROOFPARCEL.md +227 -0
- package/docs/PROOFS.md +262 -0
- package/docs/RECEIPTS.md +341 -0
- package/docs/REPUTATION.md +158 -0
- package/docs/SDK.md +301 -0
- package/docs/STRATEGY-ARCHIVE.md +5055 -0
- package/docs/SUPERVISOR-RUNBOOK.md +52 -0
- package/docs/TRUST-BOUNDARIES.md +335 -0
- package/docs/TRUSTLEDGER.md +1976 -0
- package/docs/USAGE-BUDGET.json +121 -0
- package/docs/VERIFY-SERVICE.md +168 -0
- package/index.js +160 -0
- package/package.json +41 -0
- package/trustledger/build-standalone.js +796 -0
- package/trustledger/cli.js +3179 -0
- package/trustledger/close.js +391 -0
- package/trustledger/corpus.js +159 -0
- package/trustledger/dist/BUILD-PROVENANCE.json +99 -0
- package/trustledger/dist/trustledger-standalone.html +6197 -0
- package/trustledger/dist/trustledger-standalone.html.sha256 +1 -0
- package/trustledger/door-core.js +442 -0
- package/trustledger/fixtures/bank.csv +7 -0
- package/trustledger/fixtures/bank.malformed.csv +3 -0
- package/trustledger/fixtures/bank.noalias.csv +5 -0
- package/trustledger/fixtures/bank.ofx +34 -0
- package/trustledger/fixtures/bank.real.csv +5 -0
- package/trustledger/fixtures/corpus/_shared/prior-close.json +22 -0
- package/trustledger/fixtures/corpus/bank-book-mismatch--benign-twin/inputs.json +14 -0
- package/trustledger/fixtures/corpus/bank-book-mismatch--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/bank-book-mismatch--out-of-trust/inputs.json +14 -0
- package/trustledger/fixtures/corpus/bank-book-mismatch--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/continuity-break--benign-twin/inputs.json +15 -0
- package/trustledger/fixtures/corpus/continuity-break--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/continuity-break--out-of-trust/inputs.json +15 -0
- package/trustledger/fixtures/corpus/continuity-break--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/negative-tenant-ledger--benign-twin/inputs.json +13 -0
- package/trustledger/fixtures/corpus/negative-tenant-ledger--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/negative-tenant-ledger--out-of-trust/inputs.json +13 -0
- package/trustledger/fixtures/corpus/negative-tenant-ledger--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/owner-overdraw--benign-twin/inputs.json +15 -0
- package/trustledger/fixtures/corpus/owner-overdraw--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/owner-overdraw--out-of-trust/inputs.json +15 -0
- package/trustledger/fixtures/corpus/owner-overdraw--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/security-deposit-segregation--benign-twin/inputs.json +16 -0
- package/trustledger/fixtures/corpus/security-deposit-segregation--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/security-deposit-segregation--out-of-trust/inputs.json +13 -0
- package/trustledger/fixtures/corpus/security-deposit-segregation--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/corpus/subledger-out-of-balance--benign-twin/inputs.json +13 -0
- package/trustledger/fixtures/corpus/subledger-out-of-balance--benign-twin/meta.json +7 -0
- package/trustledger/fixtures/corpus/subledger-out-of-balance--out-of-trust/inputs.json +13 -0
- package/trustledger/fixtures/corpus/subledger-out-of-balance--out-of-trust/meta.json +7 -0
- package/trustledger/fixtures/e2e/bank.aliased.csv +4 -0
- package/trustledger/fixtures/e2e/bank.csv +4 -0
- package/trustledger/fixtures/e2e/bank.nsf.csv +4 -0
- package/trustledger/fixtures/e2e/quickbooks.csv +6 -0
- package/trustledger/fixtures/e2e/quickbooks.nsf.csv +8 -0
- package/trustledger/fixtures/e2e/rentroll.csv +6 -0
- package/trustledger/fixtures/e2e/rentroll.nsf.csv +8 -0
- package/trustledger/fixtures/e2e/rentroll.short.csv +5 -0
- package/trustledger/fixtures/plans/baseline.json +25 -0
- package/trustledger/fixtures/plans/price-binding.example.json +27 -0
- package/trustledger/fixtures/policy/ambiguous-deposit-example.json +12 -0
- package/trustledger/fixtures/policy/baseline.json +19 -0
- package/trustledger/fixtures/policy/ca-example.json +12 -0
- package/trustledger/fixtures/policy/negative-tenant-ledger-example.json +12 -0
- package/trustledger/fixtures/policy/owner-overdraw-example.json +12 -0
- package/trustledger/fixtures/quickbooks.csv +7 -0
- package/trustledger/fixtures/quickbooks.real.csv +5 -0
- package/trustledger/fixtures/rentroll.csv +6 -0
- package/trustledger/fixtures/rentroll.real.csv +4 -0
- package/trustledger/ingest.js +1163 -0
- package/trustledger/lib/policy-bundled-loader.js +44 -0
- package/trustledger/lib/sha256-vendored.js +227 -0
- package/trustledger/license.js +563 -0
- package/trustledger/match.js +551 -0
- package/trustledger/plans.js +551 -0
- package/trustledger/policy.js +398 -0
- package/trustledger/public/index.html +512 -0
- package/trustledger/reconcile.js +1486 -0
- package/trustledger/report.js +887 -0
- package/trustledger/seal.js +854 -0
- package/trustledger/server.js +391 -0
- package/trustledger/valueproof.js +350 -0
|
@@ -0,0 +1,1163 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
// TrustLedger — ingest.js
|
|
4
|
+
//
|
|
5
|
+
// T-22.1: parse + NORMALIZE the three monthly inputs a small property-management
|
|
6
|
+
// trust-account reconciliation needs into ONE transaction model:
|
|
7
|
+
//
|
|
8
|
+
// (a) a BANK STATEMENT — CSV or OFX/QFX
|
|
9
|
+
// (b) a QUICKBOOKS trust ledger — CSV export
|
|
10
|
+
// (c) a RENT-ROLL / tenant — CSV sub-ledger
|
|
11
|
+
//
|
|
12
|
+
// Every parser is a PURE function: (text, [opts]) -> NormalizedRecord[].
|
|
13
|
+
// No I/O, no clock, no globals — the same input always yields the same output,
|
|
14
|
+
// which is what makes the downstream matcher/reconciler deterministic and
|
|
15
|
+
// audit-defensible.
|
|
16
|
+
//
|
|
17
|
+
// Normalized record shape (every field always present):
|
|
18
|
+
// {
|
|
19
|
+
// date: "YYYY-MM-DD", // ISO calendar date
|
|
20
|
+
// amount: <integer cents>, // SIGNED: + = money INTO the trust account,
|
|
21
|
+
// // - = money OUT. Never a float.
|
|
22
|
+
// memo: <string>, // free-text description (trimmed)
|
|
23
|
+
// kind: <Kind>, // coarse transaction class (see KIND)
|
|
24
|
+
// party: <string>, // tenant / payee / counterparty ("" if unknown)
|
|
25
|
+
// source: <Source>, // which input this row came from
|
|
26
|
+
// }
|
|
27
|
+
//
|
|
28
|
+
// Amounts are INTEGER CENTS throughout. Dollar strings are parsed by exact
|
|
29
|
+
// digit manipulation (never `parseFloat`), so "1234.99" -> 123499 with zero
|
|
30
|
+
// binary-float drift, and a value like "10.005" is REJECTED, not rounded.
|
|
31
|
+
//
|
|
32
|
+
// "Strict" is the whole point: a malformed row raises an IngestError naming the
|
|
33
|
+
// row number and the problem, rather than being silently dropped. A trust
|
|
34
|
+
// reconciliation that quietly skips a row is worse than useless — it hides the
|
|
35
|
+
// exact discrepancy a broker is legally on the hook to find.
|
|
36
|
+
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// Enums
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
const SOURCE = Object.freeze({
|
|
42
|
+
BANK: "bank",
|
|
43
|
+
QUICKBOOKS: "quickbooks",
|
|
44
|
+
RENT_ROLL: "rent_roll",
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const KIND = Object.freeze({
|
|
48
|
+
DEPOSIT: "deposit", // money in (rent received, owner contribution)
|
|
49
|
+
CHECK: "check", // money out by check (owner draw, vendor, refund)
|
|
50
|
+
TRANSFER: "transfer", // money moved between accounts
|
|
51
|
+
FEE: "fee", // bank/service fee out
|
|
52
|
+
NSF: "nsf", // returned/bounced item reversal
|
|
53
|
+
ADJUSTMENT: "adjustment", // manual correction
|
|
54
|
+
OTHER: "other", // classified but uncategorized
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
const VALID_KINDS = new Set(Object.values(KIND));
|
|
58
|
+
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Errors
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
class IngestError extends Error {
|
|
64
|
+
// `row` is a 1-based line number within the data (1 = first data row, header
|
|
65
|
+
// excluded) when known, else null. `source` is the SOURCE being parsed.
|
|
66
|
+
constructor(message, { row = null, source = null } = {}) {
|
|
67
|
+
const where =
|
|
68
|
+
row != null ? ` (row ${row}${source ? `, ${source}` : ""})` : "";
|
|
69
|
+
super(`${message}${where}`);
|
|
70
|
+
this.name = "IngestError";
|
|
71
|
+
this.row = row;
|
|
72
|
+
this.source = source;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// Amount parsing — exact integer cents, no float
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
// Parse a human dollar string into SIGNED integer cents, exactly.
|
|
81
|
+
//
|
|
82
|
+
// Accepts:
|
|
83
|
+
// "1,234.56" "1234.56" "1234" ".5" "0.05" "$1,234.56"
|
|
84
|
+
// leading "-" or "+", and accounting-style parentheses "(1,234.56)" => negative.
|
|
85
|
+
// Rejects (throws):
|
|
86
|
+
// empty, non-numeric, > 2 decimal places, multiple signs, malformed grouping.
|
|
87
|
+
//
|
|
88
|
+
// `field` and `loc` ({row, source}) only flavor the error message.
|
|
89
|
+
function parseCents(raw, field = "amount", loc = {}) {
|
|
90
|
+
if (raw == null) {
|
|
91
|
+
throw new IngestError(`missing ${field}`, loc);
|
|
92
|
+
}
|
|
93
|
+
let s = String(raw).trim();
|
|
94
|
+
if (s === "") {
|
|
95
|
+
throw new IngestError(`empty ${field}`, loc);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Accounting negatives: (1,234.56) == -1234.56
|
|
99
|
+
let negative = false;
|
|
100
|
+
if (/^\(.*\)$/.test(s)) {
|
|
101
|
+
negative = true;
|
|
102
|
+
s = s.slice(1, -1).trim();
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Strip a single currency symbol if present.
|
|
106
|
+
s = s.replace(/^\$/, "").trim();
|
|
107
|
+
|
|
108
|
+
// Leading sign.
|
|
109
|
+
const signMatch = s.match(/^([+-])/);
|
|
110
|
+
if (signMatch) {
|
|
111
|
+
if (signMatch[1] === "-") negative = !negative;
|
|
112
|
+
s = s.slice(1).trim();
|
|
113
|
+
}
|
|
114
|
+
if (s === "") {
|
|
115
|
+
throw new IngestError(`malformed ${field}: "${raw}"`, loc);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// No further signs allowed anywhere.
|
|
119
|
+
if (/[+-]/.test(s)) {
|
|
120
|
+
throw new IngestError(`malformed ${field}: "${raw}"`, loc);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Remove thousands separators ONLY when they group digits correctly.
|
|
124
|
+
// (We do not try to be clever about locale; commas are grouping, period is
|
|
125
|
+
// the decimal point — the US convention these inputs use.)
|
|
126
|
+
if (s.includes(",")) {
|
|
127
|
+
const parts = s.split(",");
|
|
128
|
+
// First group: 1..3 digits; every later group: exactly 3 digits.
|
|
129
|
+
// The last group may carry the decimal portion.
|
|
130
|
+
for (let i = 0; i < parts.length; i++) {
|
|
131
|
+
const seg = i === parts.length - 1 ? parts[i].split(".")[0] : parts[i];
|
|
132
|
+
const ok = i === 0 ? /^\d{1,3}$/.test(seg) : /^\d{3}$/.test(seg);
|
|
133
|
+
if (!ok) {
|
|
134
|
+
throw new IngestError(`malformed ${field}: "${raw}"`, loc);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
s = s.replace(/,/g, "");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Now s must be digits with at most one dot and <=2 fractional digits.
|
|
141
|
+
const m = s.match(/^(\d*)(?:\.(\d{0,2}))?$/);
|
|
142
|
+
if (!m || (m[1] === "" && (m[2] === undefined || m[2] === ""))) {
|
|
143
|
+
throw new IngestError(`malformed ${field}: "${raw}"`, loc);
|
|
144
|
+
}
|
|
145
|
+
const whole = m[1] === "" ? "0" : m[1];
|
|
146
|
+
const frac = (m[2] || "").padEnd(2, "0");
|
|
147
|
+
|
|
148
|
+
// Build cents via integer math — no Number on the dollar portion's magnitude
|
|
149
|
+
// beyond safe-integer range checks.
|
|
150
|
+
const dollars = Number(whole);
|
|
151
|
+
const cents = Number(frac);
|
|
152
|
+
if (!Number.isSafeInteger(dollars)) {
|
|
153
|
+
throw new IngestError(`amount out of range: "${raw}"`, loc);
|
|
154
|
+
}
|
|
155
|
+
let total = dollars * 100 + cents;
|
|
156
|
+
if (!Number.isSafeInteger(total)) {
|
|
157
|
+
throw new IngestError(`amount out of range: "${raw}"`, loc);
|
|
158
|
+
}
|
|
159
|
+
if (negative) total = -total;
|
|
160
|
+
return total;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ---------------------------------------------------------------------------
|
|
164
|
+
// Date parsing — normalize to YYYY-MM-DD
|
|
165
|
+
// ---------------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
// Deterministic month-name -> 1..12 table. Covers the full names and the common
|
|
168
|
+
// 3-letter abbreviations QuickBooks/bank exports emit (e.g. "Jan", "Sept").
|
|
169
|
+
// Lower-cased keys; matched case-insensitively. NO locale/Date() dependency, so
|
|
170
|
+
// the same textual date always parses to the same ISO string.
|
|
171
|
+
const MONTH_NAMES = Object.freeze({
|
|
172
|
+
jan: 1, january: 1,
|
|
173
|
+
feb: 2, february: 2,
|
|
174
|
+
mar: 3, march: 3,
|
|
175
|
+
apr: 4, april: 4,
|
|
176
|
+
may: 5,
|
|
177
|
+
jun: 6, june: 6,
|
|
178
|
+
jul: 7, july: 7,
|
|
179
|
+
aug: 8, august: 8,
|
|
180
|
+
sep: 9, sept: 9, september: 9,
|
|
181
|
+
oct: 10, october: 10,
|
|
182
|
+
nov: 11, november: 11,
|
|
183
|
+
dec: 12, december: 12,
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
// Accepts: YYYY-MM-DD, MM/DD/YYYY, M/D/YY, YYYYMMDD (OFX style), and the common
|
|
187
|
+
// textual forms QuickBooks exports use — "Mon DD, YYYY" ("Jan 5, 2024") and
|
|
188
|
+
// "DD-Mon-YYYY" ("5-Jan-2024"). Returns a strict ISO date string, validating
|
|
189
|
+
// the calendar (no 02/30) with a deterministic month-name table (no Date()).
|
|
190
|
+
function parseDate(raw, loc = {}) {
|
|
191
|
+
if (raw == null) throw new IngestError("missing date", loc);
|
|
192
|
+
const s = String(raw).trim();
|
|
193
|
+
if (s === "") throw new IngestError("empty date", loc);
|
|
194
|
+
|
|
195
|
+
let y;
|
|
196
|
+
let mo;
|
|
197
|
+
let d;
|
|
198
|
+
|
|
199
|
+
let m;
|
|
200
|
+
if ((m = s.match(/^(\d{4})-(\d{2})-(\d{2})$/))) {
|
|
201
|
+
[, y, mo, d] = m;
|
|
202
|
+
} else if ((m = s.match(/^(\d{1,2})\/(\d{1,2})\/(\d{2}|\d{4})$/))) {
|
|
203
|
+
mo = m[1];
|
|
204
|
+
d = m[2];
|
|
205
|
+
y = m[3].length === 2 ? `20${m[3]}` : m[3];
|
|
206
|
+
} else if ((m = s.match(/^(\d{4})(\d{2})(\d{2})$/))) {
|
|
207
|
+
// OFX/QFX YYYYMMDD (optionally followed by HHMMSS we ignore upstream).
|
|
208
|
+
[, y, mo, d] = m;
|
|
209
|
+
} else if ((m = s.match(/^([A-Za-z]+)\.?\s+(\d{1,2}),?\s+(\d{4})$/))) {
|
|
210
|
+
// "Mon DD, YYYY" — e.g. "Jan 5, 2024", "January 5 2024", "Sept. 5, 2024".
|
|
211
|
+
const mon = MONTH_NAMES[m[1].toLowerCase()];
|
|
212
|
+
if (mon == null) throw new IngestError(`unrecognized month in date: "${raw}"`, loc);
|
|
213
|
+
mo = String(mon);
|
|
214
|
+
d = m[2];
|
|
215
|
+
y = m[3];
|
|
216
|
+
} else if ((m = s.match(/^(\d{1,2})-([A-Za-z]+)\.?-(\d{2}|\d{4})$/))) {
|
|
217
|
+
// "DD-Mon-YYYY" — e.g. "5-Jan-2024", "05-Jan-24".
|
|
218
|
+
const mon = MONTH_NAMES[m[2].toLowerCase()];
|
|
219
|
+
if (mon == null) throw new IngestError(`unrecognized month in date: "${raw}"`, loc);
|
|
220
|
+
d = m[1];
|
|
221
|
+
mo = String(mon);
|
|
222
|
+
y = m[3].length === 2 ? `20${m[3]}` : m[3];
|
|
223
|
+
} else {
|
|
224
|
+
throw new IngestError(`unrecognized date: "${raw}"`, loc);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const yi = Number(y);
|
|
228
|
+
const mi = Number(mo);
|
|
229
|
+
const di = Number(d);
|
|
230
|
+
if (mi < 1 || mi > 12) throw new IngestError(`invalid month in date: "${raw}"`, loc);
|
|
231
|
+
const daysInMonth = [
|
|
232
|
+
31,
|
|
233
|
+
// leap-year aware February
|
|
234
|
+
(yi % 4 === 0 && yi % 100 !== 0) || yi % 400 === 0 ? 29 : 28,
|
|
235
|
+
31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
|
|
236
|
+
];
|
|
237
|
+
if (di < 1 || di > daysInMonth[mi - 1]) {
|
|
238
|
+
throw new IngestError(`invalid day in date: "${raw}"`, loc);
|
|
239
|
+
}
|
|
240
|
+
const pad = (n) => String(n).padStart(2, "0");
|
|
241
|
+
return `${yi}-${pad(mi)}-${pad(di)}`;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
// CSV parsing — RFC-4180-ish: quotes, embedded commas/newlines, "" escape
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
// Parse CSV text into an array of rows (each a string[]). Handles quoted fields
|
|
249
|
+
// containing commas, newlines, and doubled quotes. Blank lines are dropped.
|
|
250
|
+
function parseCSV(text) {
|
|
251
|
+
const rows = [];
|
|
252
|
+
let field = "";
|
|
253
|
+
let row = [];
|
|
254
|
+
let inQuotes = false;
|
|
255
|
+
let sawAny = false;
|
|
256
|
+
|
|
257
|
+
const pushField = () => {
|
|
258
|
+
row.push(field);
|
|
259
|
+
field = "";
|
|
260
|
+
};
|
|
261
|
+
const pushRow = () => {
|
|
262
|
+
pushField();
|
|
263
|
+
// Drop fully-blank lines (single empty field, nothing else).
|
|
264
|
+
const blank = row.length === 1 && row[0].trim() === "";
|
|
265
|
+
if (!blank) rows.push(row);
|
|
266
|
+
row = [];
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
// Normalize CRLF/CR to LF for a single state machine.
|
|
270
|
+
const s = text.replace(/\r\n?/g, "\n");
|
|
271
|
+
|
|
272
|
+
for (let i = 0; i < s.length; i++) {
|
|
273
|
+
const c = s[i];
|
|
274
|
+
sawAny = true;
|
|
275
|
+
if (inQuotes) {
|
|
276
|
+
if (c === '"') {
|
|
277
|
+
if (s[i + 1] === '"') {
|
|
278
|
+
field += '"';
|
|
279
|
+
i++;
|
|
280
|
+
} else {
|
|
281
|
+
inQuotes = false;
|
|
282
|
+
}
|
|
283
|
+
} else {
|
|
284
|
+
field += c;
|
|
285
|
+
}
|
|
286
|
+
} else if (c === '"') {
|
|
287
|
+
inQuotes = true;
|
|
288
|
+
} else if (c === ",") {
|
|
289
|
+
pushField();
|
|
290
|
+
} else if (c === "\n") {
|
|
291
|
+
pushRow();
|
|
292
|
+
} else {
|
|
293
|
+
field += c;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
// Flush trailing field/row if the text didn't end with a newline.
|
|
297
|
+
if (field !== "" || row.length > 0 || (sawAny && rows.length === 0)) {
|
|
298
|
+
pushRow();
|
|
299
|
+
}
|
|
300
|
+
return rows;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Map header names to column indexes. Case-insensitive, trims, and accepts a
|
|
304
|
+
// list of aliases per logical column. Returns { name -> index }.
|
|
305
|
+
//
|
|
306
|
+
// `columnMap` (T-25.3) is an OPTIONAL pure `{ <logical>: <exactHeaderName> }`
|
|
307
|
+
// escape hatch: for any logical field it names, it OVERRIDES the alias auto-
|
|
308
|
+
// detect and binds that field to the EXACT (case-insensitive, trimmed) header
|
|
309
|
+
// the caller specified — for a file whose headers no alias matches. It is
|
|
310
|
+
// VALIDATED first by validateColumnMap (an unknown logical key, or a header not
|
|
311
|
+
// present in the file, hard-errors naming the available headers). Logical fields
|
|
312
|
+
// NOT named in the map fall through to the normal alias detect, so a partial map
|
|
313
|
+
// only overrides what it touches. With no columnMap, behaviour is unchanged.
|
|
314
|
+
function indexHeader(header, schema, source, columnMap = null) {
|
|
315
|
+
const norm = header.map((h) => String(h).trim().toLowerCase());
|
|
316
|
+
const overrides = columnMap
|
|
317
|
+
? validateColumnMap(columnMap, header, schema, source)
|
|
318
|
+
: null;
|
|
319
|
+
const out = {};
|
|
320
|
+
for (const [key, aliases] of Object.entries(schema)) {
|
|
321
|
+
if (overrides && Object.prototype.hasOwnProperty.call(overrides, key)) {
|
|
322
|
+
out[key] = overrides[key];
|
|
323
|
+
continue;
|
|
324
|
+
}
|
|
325
|
+
let idx = -1;
|
|
326
|
+
for (const a of aliases) {
|
|
327
|
+
idx = norm.indexOf(a.toLowerCase());
|
|
328
|
+
if (idx !== -1) break;
|
|
329
|
+
}
|
|
330
|
+
out[key] = idx;
|
|
331
|
+
}
|
|
332
|
+
return out;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Validate a `columnMap` against the file's actual header + the source schema,
|
|
336
|
+
// and resolve each entry to a 0-based column index. PURE; throws an IngestError
|
|
337
|
+
// (the existing error style) on:
|
|
338
|
+
// * an unknown logical key (not a field of this source's schema), or
|
|
339
|
+
// * a mapped-to header that is not present in the file.
|
|
340
|
+
// Both messages NAME the available options so a broker can self-correct without
|
|
341
|
+
// reading source. Returns { <logical>: <index> } for the validated entries only.
|
|
342
|
+
function validateColumnMap(columnMap, header, schema, source) {
|
|
343
|
+
const norm = header.map((h) => String(h).trim().toLowerCase());
|
|
344
|
+
const logicalKeys = Object.keys(schema);
|
|
345
|
+
const out = {};
|
|
346
|
+
for (const [logical, wantHeader] of Object.entries(columnMap)) {
|
|
347
|
+
if (!Object.prototype.hasOwnProperty.call(schema, logical)) {
|
|
348
|
+
throw new IngestError(
|
|
349
|
+
`unknown logical field "${logical}" in column map for ${source} ` +
|
|
350
|
+
`(available fields: ${logicalKeys.join(", ")})`,
|
|
351
|
+
{ source }
|
|
352
|
+
);
|
|
353
|
+
}
|
|
354
|
+
if (wantHeader == null || String(wantHeader).trim() === "") {
|
|
355
|
+
throw new IngestError(
|
|
356
|
+
`column map for "${logical}" must name a header (got empty value)`,
|
|
357
|
+
{ source }
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
const idx = norm.indexOf(String(wantHeader).trim().toLowerCase());
|
|
361
|
+
if (idx === -1) {
|
|
362
|
+
throw new IngestError(
|
|
363
|
+
`column map for "${logical}" names header "${wantHeader}" which is not ` +
|
|
364
|
+
`in the file (available headers: ${header.join(", ")})`,
|
|
365
|
+
{ source }
|
|
366
|
+
);
|
|
367
|
+
}
|
|
368
|
+
out[logical] = idx;
|
|
369
|
+
}
|
|
370
|
+
return out;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
function requireCols(cols, names, source) {
|
|
374
|
+
for (const n of names) {
|
|
375
|
+
if (cols[n] === -1 || cols[n] === undefined) {
|
|
376
|
+
throw new IngestError(
|
|
377
|
+
`missing required column "${n}" in header`,
|
|
378
|
+
{ source }
|
|
379
|
+
);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Pull a cell, tolerating short rows by treating absent as undefined.
|
|
385
|
+
function cell(arr, idx) {
|
|
386
|
+
if (idx === -1 || idx === undefined) return undefined;
|
|
387
|
+
return arr[idx];
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// ---------------------------------------------------------------------------
|
|
391
|
+
// Kind classification helpers
|
|
392
|
+
// ---------------------------------------------------------------------------
|
|
393
|
+
|
|
394
|
+
// Infer a coarse kind from a free-text memo/type when the source doesn't give
|
|
395
|
+
// an explicit one. Deterministic keyword match; falls back to sign-based guess.
|
|
396
|
+
function classifyKind(text, amountCents) {
|
|
397
|
+
const t = String(text || "").toLowerCase();
|
|
398
|
+
if (/\bnsf\b|returned|bounced|insufficient|reversal|reverse/.test(t)) {
|
|
399
|
+
return KIND.NSF;
|
|
400
|
+
}
|
|
401
|
+
if (/\bfee\b|service charge|svc chg|charge\b/.test(t)) return KIND.FEE;
|
|
402
|
+
if (/transfer|xfer|ach out|ach in/.test(t)) return KIND.TRANSFER;
|
|
403
|
+
if (/\bcheck\b|chk #|chk#|ck#|draw|disbursement|payee/.test(t)) {
|
|
404
|
+
return KIND.CHECK;
|
|
405
|
+
}
|
|
406
|
+
if (/deposit|rent|payment received|received from/.test(t)) {
|
|
407
|
+
return KIND.DEPOSIT;
|
|
408
|
+
}
|
|
409
|
+
if (/adjust|correction|void/.test(t)) return KIND.ADJUSTMENT;
|
|
410
|
+
// Sign-based fallback: positive => deposit, negative => check.
|
|
411
|
+
if (amountCents > 0) return KIND.DEPOSIT;
|
|
412
|
+
if (amountCents < 0) return KIND.CHECK;
|
|
413
|
+
return KIND.OTHER;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// Normalize / validate an explicitly-supplied kind string.
|
|
417
|
+
function coerceKind(raw, fallbackText, amountCents, loc) {
|
|
418
|
+
// An NSF / returned-item is the single most important exception a trust
|
|
419
|
+
// reconciliation must surface, and accounting exports routinely file the
|
|
420
|
+
// reversal under a generic "Deposit"/"Check" type with "NSF" only in the
|
|
421
|
+
// memo. So a returned-item keyword ALWAYS wins over the explicit type — we
|
|
422
|
+
// would rather over-flag than silently fold a reversal into a clean deposit.
|
|
423
|
+
if (/\bnsf\b|returned|bounced|insufficient|reversal/i.test(fallbackText)) {
|
|
424
|
+
return KIND.NSF;
|
|
425
|
+
}
|
|
426
|
+
if (raw == null || String(raw).trim() === "") {
|
|
427
|
+
return classifyKind(fallbackText, amountCents);
|
|
428
|
+
}
|
|
429
|
+
const k = String(raw).trim().toLowerCase();
|
|
430
|
+
if (VALID_KINDS.has(k)) return k;
|
|
431
|
+
// Common aliases.
|
|
432
|
+
const aliases = {
|
|
433
|
+
dep: KIND.DEPOSIT,
|
|
434
|
+
chk: KIND.CHECK,
|
|
435
|
+
cheque: KIND.CHECK,
|
|
436
|
+
payment: KIND.DEPOSIT,
|
|
437
|
+
"service charge": KIND.FEE,
|
|
438
|
+
xfer: KIND.TRANSFER,
|
|
439
|
+
returned: KIND.NSF,
|
|
440
|
+
bounce: KIND.NSF,
|
|
441
|
+
adj: KIND.ADJUSTMENT,
|
|
442
|
+
};
|
|
443
|
+
if (aliases[k]) return aliases[k];
|
|
444
|
+
// Unknown kind word is not fatal — classify from text/sign but keep going.
|
|
445
|
+
return classifyKind(`${raw} ${fallbackText}`, amountCents);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
function makeRecord({ date, amount, memo, kind, party, source }) {
|
|
449
|
+
return {
|
|
450
|
+
date,
|
|
451
|
+
amount,
|
|
452
|
+
memo: String(memo == null ? "" : memo).trim(),
|
|
453
|
+
kind,
|
|
454
|
+
party: String(party == null ? "" : party).trim(),
|
|
455
|
+
source,
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// ---------------------------------------------------------------------------
|
|
460
|
+
// (a) BANK STATEMENT — CSV or OFX/QFX
|
|
461
|
+
// ---------------------------------------------------------------------------
|
|
462
|
+
|
|
463
|
+
// Bank CSVs vary wildly; we support BOTH common shapes:
|
|
464
|
+
// * a single signed Amount column, OR
|
|
465
|
+
// * separate Debit / Credit columns (debit => money out => negative).
|
|
466
|
+
const BANK_SCHEMA = {
|
|
467
|
+
date: ["date", "posted", "posting date", "transaction date", "trans date"],
|
|
468
|
+
amount: ["amount", "amt"],
|
|
469
|
+
debit: [
|
|
470
|
+
"debit",
|
|
471
|
+
"withdrawal",
|
|
472
|
+
"withdrawals",
|
|
473
|
+
"money out",
|
|
474
|
+
// real bank exports (Chase/BofA/Wells/QB CSV) — money OUT columns
|
|
475
|
+
"withdrawal amt",
|
|
476
|
+
"withdrawal amount",
|
|
477
|
+
"debit amt",
|
|
478
|
+
"debit amount",
|
|
479
|
+
],
|
|
480
|
+
credit: [
|
|
481
|
+
"credit",
|
|
482
|
+
"deposit",
|
|
483
|
+
"deposits",
|
|
484
|
+
"money in",
|
|
485
|
+
// real bank exports — money IN columns
|
|
486
|
+
"deposit amt",
|
|
487
|
+
"deposit amount",
|
|
488
|
+
"credit amt",
|
|
489
|
+
"credit amount",
|
|
490
|
+
],
|
|
491
|
+
memo: ["description", "memo", "details", "name", "payee", "check number", "check #", "check no"],
|
|
492
|
+
type: ["type", "transaction type"],
|
|
493
|
+
};
|
|
494
|
+
|
|
495
|
+
// Build ONE normalized bank record from a parsed row, given the column map and
|
|
496
|
+
// the signed/split detection. PURE: throws an IngestError (with `loc`) on any
|
|
497
|
+
// bad cell, exactly as the strict parser must. The strict parser and the
|
|
498
|
+
// diagnostic parser share this single copy of the per-row logic — they differ
|
|
499
|
+
// ONLY in that the diagnostic path wraps it in try/catch to accumulate errors.
|
|
500
|
+
function buildBankRecord(arr, cols, hasSigned, loc) {
|
|
501
|
+
const date = parseDate(cell(arr, cols.date), loc);
|
|
502
|
+
|
|
503
|
+
let amount;
|
|
504
|
+
if (hasSigned) {
|
|
505
|
+
amount = parseCents(cell(arr, cols.amount), "amount", loc);
|
|
506
|
+
} else {
|
|
507
|
+
const dRaw = cell(arr, cols.debit);
|
|
508
|
+
const cRaw = cell(arr, cols.credit);
|
|
509
|
+
const dHas = dRaw != null && String(dRaw).trim() !== "";
|
|
510
|
+
const cHas = cRaw != null && String(cRaw).trim() !== "";
|
|
511
|
+
if (dHas && cHas) {
|
|
512
|
+
throw new IngestError("row has BOTH a debit and a credit value", loc);
|
|
513
|
+
}
|
|
514
|
+
if (!dHas && !cHas) {
|
|
515
|
+
throw new IngestError("row has neither debit nor credit value", loc);
|
|
516
|
+
}
|
|
517
|
+
if (dHas) {
|
|
518
|
+
const v = parseCents(dRaw, "debit", loc);
|
|
519
|
+
amount = -Math.abs(v);
|
|
520
|
+
} else {
|
|
521
|
+
const v = parseCents(cRaw, "credit", loc);
|
|
522
|
+
amount = Math.abs(v);
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
const memo = cell(arr, cols.memo) || "";
|
|
527
|
+
const typeText = cell(arr, cols.type) || "";
|
|
528
|
+
const kind = coerceKind(typeText, `${typeText} ${memo}`, amount, loc);
|
|
529
|
+
return makeRecord({
|
|
530
|
+
date,
|
|
531
|
+
amount,
|
|
532
|
+
memo,
|
|
533
|
+
kind,
|
|
534
|
+
party: "",
|
|
535
|
+
source: SOURCE.BANK,
|
|
536
|
+
});
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
function parseBankCSV(text, opts = {}) {
|
|
540
|
+
const rows = parseCSV(text);
|
|
541
|
+
if (rows.length === 0) {
|
|
542
|
+
throw new IngestError("empty bank statement", { source: SOURCE.BANK });
|
|
543
|
+
}
|
|
544
|
+
const cols = indexHeader(rows[0], BANK_SCHEMA, SOURCE.BANK, opts.columnMap);
|
|
545
|
+
requireCols(cols, ["date"], SOURCE.BANK);
|
|
546
|
+
const hasSigned = cols.amount !== -1;
|
|
547
|
+
const hasSplit = cols.debit !== -1 || cols.credit !== -1;
|
|
548
|
+
if (!hasSigned && !hasSplit) {
|
|
549
|
+
throw new IngestError(
|
|
550
|
+
'bank statement needs an "amount" column or debit/credit columns',
|
|
551
|
+
{ source: SOURCE.BANK }
|
|
552
|
+
);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
const out = [];
|
|
556
|
+
for (let r = 1; r < rows.length; r++) {
|
|
557
|
+
const loc = { row: r, source: SOURCE.BANK };
|
|
558
|
+
out.push(buildBankRecord(rows[r], cols, hasSigned, loc));
|
|
559
|
+
}
|
|
560
|
+
return out;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
// Pull a single (possibly unclosed, SGML-style) tag value from an OFX block:
|
|
564
|
+
// "everything up to the next '<' or newline".
|
|
565
|
+
function ofxTagVal(block, tag) {
|
|
566
|
+
const m = block.match(new RegExp(`<${tag}>([^<\\r\\n]*)`, "i"));
|
|
567
|
+
return m ? m[1].trim() : undefined;
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
// Split an OFX/QFX document into its <STMTTRN> transaction blocks. Throws when
|
|
571
|
+
// the text is plainly not an OFX document at all (so a misrouted CSV is a clear
|
|
572
|
+
// error, not a silent empty result).
|
|
573
|
+
function ofxBlocks(text) {
|
|
574
|
+
const blocks = text.match(/<STMTTRN>[\s\S]*?<\/STMTTRN>/gi) || [];
|
|
575
|
+
if (blocks.length === 0 && !/<OFX>|<STMTTRN>/i.test(text)) {
|
|
576
|
+
throw new IngestError("not an OFX/QFX document", { source: SOURCE.BANK });
|
|
577
|
+
}
|
|
578
|
+
return blocks;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Build ONE normalized bank record from a single OFX <STMTTRN> block. PURE;
|
|
582
|
+
// throws an IngestError (with `loc`) on any bad/missing tag, exactly like the
|
|
583
|
+
// CSV per-row builders. Shared verbatim by the strict and diagnostic OFX paths.
|
|
584
|
+
function buildOFXRecord(block, loc) {
|
|
585
|
+
const dtRaw = ofxTagVal(block, "DTPOSTED");
|
|
586
|
+
if (dtRaw == null) throw new IngestError("OFX txn missing DTPOSTED", loc);
|
|
587
|
+
// DTPOSTED may include time/zone: take the leading YYYYMMDD.
|
|
588
|
+
const date = parseDate(dtRaw.slice(0, 8), loc);
|
|
589
|
+
const amount = parseCents(ofxTagVal(block, "TRNAMT"), "TRNAMT", loc);
|
|
590
|
+
const memo = ofxTagVal(block, "MEMO") || ofxTagVal(block, "NAME") || "";
|
|
591
|
+
const trntype = ofxTagVal(block, "TRNTYPE") || "";
|
|
592
|
+
const kind = coerceKind(trntype, `${trntype} ${memo}`, amount, loc);
|
|
593
|
+
return makeRecord({ date, amount, memo, kind, party: "", source: SOURCE.BANK });
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
// Minimal OFX/QFX SGML reader: pull each <STMTTRN> block's fields. We only need
|
|
597
|
+
// TRNTYPE, DTPOSTED, TRNAMT, NAME/MEMO. OFX tags are often unclosed (SGML), so
|
|
598
|
+
// we read each tag's value as "everything up to the next '<'".
|
|
599
|
+
function parseOFX(text) {
|
|
600
|
+
const out = [];
|
|
601
|
+
ofxBlocks(text).forEach((block, i) => {
|
|
602
|
+
out.push(buildOFXRecord(block, { row: i + 1, source: SOURCE.BANK }));
|
|
603
|
+
});
|
|
604
|
+
return out;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// Auto-detect OFX vs CSV from the content; `format` ("csv"|"ofx") forces it.
|
|
608
|
+
// `columnMap` (CSV only) overrides the alias auto-detect — OFX has no CSV header.
|
|
609
|
+
function parseBankStatement(text, { format, columnMap } = {}) {
|
|
610
|
+
if (text == null) throw new IngestError("no bank input", { source: SOURCE.BANK });
|
|
611
|
+
const fmt = format || (/<OFX>|<STMTTRN>|OFXHEADER/i.test(text) ? "ofx" : "csv");
|
|
612
|
+
if (fmt === "ofx") return parseOFX(text);
|
|
613
|
+
return parseBankCSV(text, { columnMap });
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
// ---------------------------------------------------------------------------
|
|
617
|
+
// (b) QUICKBOOKS trust-ledger CSV
|
|
618
|
+
// ---------------------------------------------------------------------------
|
|
619
|
+
|
|
620
|
+
// A QuickBooks account "transaction detail" export. QB typically emits separate
|
|
621
|
+
// Debit (money out of the bank/trust register) and Credit (money in) columns,
|
|
622
|
+
// plus Type, Name, Memo, Date. We treat Credit as +, Debit as -, matching the
|
|
623
|
+
// bank's signed convention so the two can be reconciled directly.
|
|
624
|
+
const QB_SCHEMA = {
|
|
625
|
+
date: ["date", "trans date", "transaction date"],
|
|
626
|
+
type: ["type", "transaction type"],
|
|
627
|
+
party: [
|
|
628
|
+
"name",
|
|
629
|
+
"payee",
|
|
630
|
+
"customer",
|
|
631
|
+
"vendor",
|
|
632
|
+
"received from",
|
|
633
|
+
"paid to",
|
|
634
|
+
// QuickBooks "transaction detail" report columns
|
|
635
|
+
"split",
|
|
636
|
+
"account",
|
|
637
|
+
],
|
|
638
|
+
// QB exports often carry the check/reference number in a "Num" column and a
|
|
639
|
+
// cleared flag in "Clr"; fold them into the free-text memo so they survive.
|
|
640
|
+
memo: ["memo", "description", "memo/description", "num", "clr"],
|
|
641
|
+
debit: ["debit", "payment", "decrease"],
|
|
642
|
+
credit: ["credit", "deposit", "increase"],
|
|
643
|
+
amount: ["amount", "amt"],
|
|
644
|
+
};
|
|
645
|
+
|
|
646
|
+
// Build ONE normalized QuickBooks record from a parsed row. PURE; throws on a
|
|
647
|
+
// bad cell. Shared verbatim by the strict and diagnostic QuickBooks parsers.
|
|
648
|
+
function buildQuickBooksRecord(arr, cols, hasSigned, loc) {
|
|
649
|
+
const date = parseDate(cell(arr, cols.date), loc);
|
|
650
|
+
|
|
651
|
+
let amount;
|
|
652
|
+
if (hasSigned) {
|
|
653
|
+
amount = parseCents(cell(arr, cols.amount), "amount", loc);
|
|
654
|
+
} else {
|
|
655
|
+
const dRaw = cell(arr, cols.debit);
|
|
656
|
+
const cRaw = cell(arr, cols.credit);
|
|
657
|
+
const dHas = dRaw != null && String(dRaw).trim() !== "";
|
|
658
|
+
const cHas = cRaw != null && String(cRaw).trim() !== "";
|
|
659
|
+
if (dHas && cHas) {
|
|
660
|
+
throw new IngestError("row has BOTH debit and credit values", loc);
|
|
661
|
+
}
|
|
662
|
+
if (!dHas && !cHas) {
|
|
663
|
+
throw new IngestError("row has neither debit nor credit value", loc);
|
|
664
|
+
}
|
|
665
|
+
amount = dHas
|
|
666
|
+
? -Math.abs(parseCents(dRaw, "debit", loc))
|
|
667
|
+
: Math.abs(parseCents(cRaw, "credit", loc));
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
const memo = cell(arr, cols.memo) || "";
|
|
671
|
+
const party = cell(arr, cols.party) || "";
|
|
672
|
+
const typeText = cell(arr, cols.type) || "";
|
|
673
|
+
const kind = coerceKind(typeText, `${typeText} ${memo}`, amount, loc);
|
|
674
|
+
return makeRecord({
|
|
675
|
+
date,
|
|
676
|
+
amount,
|
|
677
|
+
memo,
|
|
678
|
+
kind,
|
|
679
|
+
party,
|
|
680
|
+
source: SOURCE.QUICKBOOKS,
|
|
681
|
+
});
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
function parseQuickBooksCSV(text, opts = {}) {
|
|
685
|
+
if (text == null) {
|
|
686
|
+
throw new IngestError("no QuickBooks input", { source: SOURCE.QUICKBOOKS });
|
|
687
|
+
}
|
|
688
|
+
const rows = parseCSV(text);
|
|
689
|
+
if (rows.length === 0) {
|
|
690
|
+
throw new IngestError("empty QuickBooks export", {
|
|
691
|
+
source: SOURCE.QUICKBOOKS,
|
|
692
|
+
});
|
|
693
|
+
}
|
|
694
|
+
const cols = indexHeader(rows[0], QB_SCHEMA, SOURCE.QUICKBOOKS, opts.columnMap);
|
|
695
|
+
requireCols(cols, ["date"], SOURCE.QUICKBOOKS);
|
|
696
|
+
const hasSigned = cols.amount !== -1;
|
|
697
|
+
const hasSplit = cols.debit !== -1 || cols.credit !== -1;
|
|
698
|
+
if (!hasSigned && !hasSplit) {
|
|
699
|
+
throw new IngestError(
|
|
700
|
+
'QuickBooks export needs an "amount" column or debit/credit columns',
|
|
701
|
+
{ source: SOURCE.QUICKBOOKS }
|
|
702
|
+
);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
const out = [];
|
|
706
|
+
for (let r = 1; r < rows.length; r++) {
|
|
707
|
+
const loc = { row: r, source: SOURCE.QUICKBOOKS };
|
|
708
|
+
out.push(buildQuickBooksRecord(rows[r], cols, hasSigned, loc));
|
|
709
|
+
}
|
|
710
|
+
return out;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
// ---------------------------------------------------------------------------
|
|
714
|
+
// (c) RENT-ROLL / tenant sub-ledger CSV
|
|
715
|
+
// ---------------------------------------------------------------------------
|
|
716
|
+
|
|
717
|
+
// A per-tenant sub-ledger: each row is a charge or a payment against a tenant.
|
|
718
|
+
// Convention here: a tenant PAYMENT is money INTO the trust account (+), a
|
|
719
|
+
// CHARGE/assessment is what the tenant owes and is recorded as negative on the
|
|
720
|
+
// cash side only when it represents an outflow; for reconciliation against the
|
|
721
|
+
// bank we care about CASH events, so charges (non-cash) are tagged but kept
|
|
722
|
+
// with their signed cash effect (0 unless they move money).
|
|
723
|
+
//
|
|
724
|
+
// To stay simple and cash-focused we accept either:
|
|
725
|
+
// * a signed Amount (positive = payment received), OR
|
|
726
|
+
// * separate Payment / Charge columns (payment => +, charge => recorded but
|
|
727
|
+
// non-cash, sign 0 unless it is a refund which is negative cash).
|
|
728
|
+
const RENT_SCHEMA = {
|
|
729
|
+
date: ["date", "posted", "transaction date"],
|
|
730
|
+
tenant: ["tenant", "name", "resident", "lessee", "party", "lease"],
|
|
731
|
+
unit: ["unit", "apt", "apartment", "property", "door"],
|
|
732
|
+
memo: ["memo", "description", "note", "charge type", "details"],
|
|
733
|
+
amount: ["amount", "amt"],
|
|
734
|
+
payment: ["payment", "paid", "received", "credit", "amount paid"],
|
|
735
|
+
charge: ["charge", "owed", "assessment", "debit", "amount due"],
|
|
736
|
+
type: ["type", "transaction type"],
|
|
737
|
+
};
|
|
738
|
+
|
|
739
|
+
// Build ONE normalized rent-roll record from a parsed row. PURE; throws on a
|
|
740
|
+
// bad cell or a missing tenant. Shared verbatim by the strict and diagnostic
|
|
741
|
+
// rent-roll parsers.
|
|
742
|
+
function buildRentRollRecord(arr, cols, hasSigned, loc) {
|
|
743
|
+
const date = parseDate(cell(arr, cols.date), loc);
|
|
744
|
+
const tenant = cell(arr, cols.tenant);
|
|
745
|
+
if (tenant == null || String(tenant).trim() === "") {
|
|
746
|
+
throw new IngestError("rent-roll row missing tenant", loc);
|
|
747
|
+
}
|
|
748
|
+
const unit = cell(arr, cols.unit);
|
|
749
|
+
|
|
750
|
+
let amount;
|
|
751
|
+
let kindHint;
|
|
752
|
+
if (hasSigned) {
|
|
753
|
+
amount = parseCents(cell(arr, cols.amount), "amount", loc);
|
|
754
|
+
kindHint = amount >= 0 ? KIND.DEPOSIT : KIND.CHECK;
|
|
755
|
+
} else {
|
|
756
|
+
const pRaw = cell(arr, cols.payment);
|
|
757
|
+
const cRaw = cell(arr, cols.charge);
|
|
758
|
+
const pHas = pRaw != null && String(pRaw).trim() !== "";
|
|
759
|
+
const cHas = cRaw != null && String(cRaw).trim() !== "";
|
|
760
|
+
if (pHas && cHas) {
|
|
761
|
+
throw new IngestError(
|
|
762
|
+
"rent-roll row has BOTH a payment and a charge",
|
|
763
|
+
loc
|
|
764
|
+
);
|
|
765
|
+
}
|
|
766
|
+
if (!pHas && !cHas) {
|
|
767
|
+
throw new IngestError(
|
|
768
|
+
"rent-roll row has neither payment nor charge",
|
|
769
|
+
loc
|
|
770
|
+
);
|
|
771
|
+
}
|
|
772
|
+
if (pHas) {
|
|
773
|
+
amount = Math.abs(parseCents(pRaw, "payment", loc));
|
|
774
|
+
kindHint = KIND.DEPOSIT;
|
|
775
|
+
} else {
|
|
776
|
+
// A charge is an accrual, not a cash movement: record it but with a
|
|
777
|
+
// negative sign reflecting what the tenant owes the trust ledger.
|
|
778
|
+
amount = -Math.abs(parseCents(cRaw, "charge", loc));
|
|
779
|
+
kindHint = KIND.ADJUSTMENT;
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
const memoRaw = cell(arr, cols.memo) || "";
|
|
784
|
+
const typeText = cell(arr, cols.type) || "";
|
|
785
|
+
// Let explicit type/keywords (e.g. "NSF") override the cash-based hint.
|
|
786
|
+
let kind = coerceKind(typeText, `${typeText} ${memoRaw}`, amount, loc);
|
|
787
|
+
if (
|
|
788
|
+
(typeText == null || typeText.trim() === "") &&
|
|
789
|
+
!/nsf|returned|fee|transfer|adjust|void/i.test(memoRaw)
|
|
790
|
+
) {
|
|
791
|
+
kind = kindHint;
|
|
792
|
+
}
|
|
793
|
+
const party = unit
|
|
794
|
+
? `${String(tenant).trim()} (${String(unit).trim()})`
|
|
795
|
+
: String(tenant).trim();
|
|
796
|
+
return makeRecord({
|
|
797
|
+
date,
|
|
798
|
+
amount,
|
|
799
|
+
memo: memoRaw,
|
|
800
|
+
kind,
|
|
801
|
+
party,
|
|
802
|
+
source: SOURCE.RENT_ROLL,
|
|
803
|
+
});
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
function parseRentRollCSV(text, opts = {}) {
|
|
807
|
+
if (text == null) {
|
|
808
|
+
throw new IngestError("no rent-roll input", { source: SOURCE.RENT_ROLL });
|
|
809
|
+
}
|
|
810
|
+
const rows = parseCSV(text);
|
|
811
|
+
if (rows.length === 0) {
|
|
812
|
+
throw new IngestError("empty rent roll", { source: SOURCE.RENT_ROLL });
|
|
813
|
+
}
|
|
814
|
+
const cols = indexHeader(rows[0], RENT_SCHEMA, SOURCE.RENT_ROLL, opts.columnMap);
|
|
815
|
+
requireCols(cols, ["date", "tenant"], SOURCE.RENT_ROLL);
|
|
816
|
+
const hasSigned = cols.amount !== -1;
|
|
817
|
+
const hasSplit = cols.payment !== -1 || cols.charge !== -1;
|
|
818
|
+
if (!hasSigned && !hasSplit) {
|
|
819
|
+
throw new IngestError(
|
|
820
|
+
'rent roll needs an "amount" column or payment/charge columns',
|
|
821
|
+
{ source: SOURCE.RENT_ROLL }
|
|
822
|
+
);
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
const out = [];
|
|
826
|
+
for (let r = 1; r < rows.length; r++) {
|
|
827
|
+
const loc = { row: r, source: SOURCE.RENT_ROLL };
|
|
828
|
+
out.push(buildRentRollRecord(rows[r], cols, hasSigned, loc));
|
|
829
|
+
}
|
|
830
|
+
return out;
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
// ---------------------------------------------------------------------------
|
|
834
|
+
// Diagnostic ingest core (T-25.1) — parse-WITH-report, never fail-closed
|
|
835
|
+
// ---------------------------------------------------------------------------
|
|
836
|
+
//
|
|
837
|
+
// The strict parsers above (parseBankStatement / parseQuickBooksCSV /
|
|
838
|
+
// parseRentRollCSV) fail CLOSED: the first malformed row aborts the whole file.
|
|
839
|
+
// That is correct for the reconcile path — a trust reconciliation must NEVER
|
|
840
|
+
// silently partial-parse, because a dropped row hides the exact discrepancy the
|
|
841
|
+
// broker is legally on the hook to find.
|
|
842
|
+
//
|
|
843
|
+
// But ONBOARDING needs the opposite: when a broker first feeds the tool a real
|
|
844
|
+
// export, they need to SEE what happened — which header columns mapped to which
|
|
845
|
+
// logical field, how many rows normalized, and EVERY row that failed (not just
|
|
846
|
+
// the first) — so they can fix the file or supply a column map. That is what the
|
|
847
|
+
// `diagnose*` family provides.
|
|
848
|
+
//
|
|
849
|
+
// CRITICAL INVARIANT: the diagnostic path REUSES the exact same per-row builders
|
|
850
|
+
// (buildBankRecord / buildQuickBooksRecord / buildRentRollRecord) and the same
|
|
851
|
+
// primitives (parseCSV / indexHeader / parseDate / parseCents / coerceKind) that
|
|
852
|
+
// the strict parsers use. It re-implements NONE of the parse logic. It differs
|
|
853
|
+
// from the strict parsers in EXACTLY two ways:
|
|
854
|
+
// (1) it wraps each per-row build in try/catch and ACCUMULATES IngestErrors
|
|
855
|
+
// instead of throwing on the first, and
|
|
856
|
+
// (2) it returns the detected header + the logical->header column map.
|
|
857
|
+
// A missing REQUIRED column is reported in `requiredMissing` (still a hard
|
|
858
|
+
// problem, surfaced to the caller) rather than collapsing the whole file.
|
|
859
|
+
//
|
|
860
|
+
// `diagnose*` is PURE and side-effect-free: no I/O, no clock, no globals. Given
|
|
861
|
+
// the same (text, opts) it returns a byte-identical report.
|
|
862
|
+
|
|
863
|
+
// Per-source diagnostic config. Each entry names the schema, the REQUIRED
|
|
864
|
+
// logical columns, and the per-row builder + amount-mode detector reused from
|
|
865
|
+
// the strict path. Centralizing this keeps the strict and diagnostic paths in
|
|
866
|
+
// lock-step: they consult the SAME schema and the SAME required set.
|
|
867
|
+
const DIAGNOSE_CONFIG = Object.freeze({
|
|
868
|
+
[SOURCE.BANK]: {
|
|
869
|
+
schema: BANK_SCHEMA,
|
|
870
|
+
required: ["date"],
|
|
871
|
+
// logical fields whose presence (any one) is also required for a usable file
|
|
872
|
+
amountGroups: [["amount"], ["debit", "credit"]],
|
|
873
|
+
amountGroupMessage:
|
|
874
|
+
'bank statement needs an "amount" column or debit/credit columns',
|
|
875
|
+
build: buildBankRecord,
|
|
876
|
+
},
|
|
877
|
+
[SOURCE.QUICKBOOKS]: {
|
|
878
|
+
schema: QB_SCHEMA,
|
|
879
|
+
required: ["date"],
|
|
880
|
+
amountGroups: [["amount"], ["debit", "credit"]],
|
|
881
|
+
amountGroupMessage:
|
|
882
|
+
'QuickBooks export needs an "amount" column or debit/credit columns',
|
|
883
|
+
build: buildQuickBooksRecord,
|
|
884
|
+
},
|
|
885
|
+
[SOURCE.RENT_ROLL]: {
|
|
886
|
+
schema: RENT_SCHEMA,
|
|
887
|
+
required: ["date", "tenant"],
|
|
888
|
+
amountGroups: [["amount"], ["payment", "charge"]],
|
|
889
|
+
amountGroupMessage:
|
|
890
|
+
'rent roll needs an "amount" column or payment/charge columns',
|
|
891
|
+
build: buildRentRollRecord,
|
|
892
|
+
},
|
|
893
|
+
});
|
|
894
|
+
|
|
895
|
+
// Diagnose an OFX/QFX bank file: the same parse-WITH-report contract as the CSV
|
|
896
|
+
// path, but OFX has no header row / column map — it is a stream of <STMTTRN>
|
|
897
|
+
// blocks. We REUSE buildOFXRecord verbatim (the strict OFX path uses the same
|
|
898
|
+
// builder) and accumulate per-transaction errors instead of failing closed. The
|
|
899
|
+
// report keeps the SAME shape so inspect renders it uniformly; `format` is set
|
|
900
|
+
// to "ofx", `header`/`mapped` reflect the OFX tags rather than CSV columns.
|
|
901
|
+
function diagnoseOFX(text, sampleSize) {
|
|
902
|
+
const report = {
|
|
903
|
+
source: SOURCE.BANK,
|
|
904
|
+
format: "ofx",
|
|
905
|
+
// For OFX there is no CSV header row; surface the OFX tags we read so the
|
|
906
|
+
// human view still has a "what columns did you see" line.
|
|
907
|
+
header: ["DTPOSTED", "TRNAMT", "TRNTYPE", "NAME/MEMO"],
|
|
908
|
+
mapped: {
|
|
909
|
+
date: "DTPOSTED",
|
|
910
|
+
amount: "TRNAMT",
|
|
911
|
+
type: "TRNTYPE",
|
|
912
|
+
memo: "NAME/MEMO",
|
|
913
|
+
},
|
|
914
|
+
requiredMissing: [],
|
|
915
|
+
rowCount: 0,
|
|
916
|
+
okCount: 0,
|
|
917
|
+
records: [],
|
|
918
|
+
errors: [],
|
|
919
|
+
sample: [],
|
|
920
|
+
};
|
|
921
|
+
|
|
922
|
+
let blocks;
|
|
923
|
+
try {
|
|
924
|
+
blocks = ofxBlocks(text);
|
|
925
|
+
} catch (err) {
|
|
926
|
+
if (err instanceof IngestError) {
|
|
927
|
+
report.errors.push({ row: null, message: err.message });
|
|
928
|
+
return report;
|
|
929
|
+
}
|
|
930
|
+
throw err;
|
|
931
|
+
}
|
|
932
|
+
if (blocks.length === 0) {
|
|
933
|
+
report.errors.push({
|
|
934
|
+
row: null,
|
|
935
|
+
message: "OFX document has no <STMTTRN> transactions",
|
|
936
|
+
});
|
|
937
|
+
return report;
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
blocks.forEach((block, i) => {
|
|
941
|
+
report.rowCount += 1;
|
|
942
|
+
const loc = { row: i + 1, source: SOURCE.BANK };
|
|
943
|
+
try {
|
|
944
|
+
const rec = buildOFXRecord(block, loc);
|
|
945
|
+
report.records.push(rec);
|
|
946
|
+
report.okCount += 1;
|
|
947
|
+
if (report.sample.length < sampleSize) report.sample.push(rec);
|
|
948
|
+
} catch (err) {
|
|
949
|
+
if (err instanceof IngestError) {
|
|
950
|
+
report.errors.push({ row: i + 1, message: err.message });
|
|
951
|
+
} else {
|
|
952
|
+
throw err;
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
});
|
|
956
|
+
return report;
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
// The single diagnostic driver. `source` selects the config; `text` is the raw
|
|
960
|
+
// file; `opts.sampleSize` controls how many ok rows are echoed in `sample`
|
|
961
|
+
// (default 5). For the bank source `opts.format` ("csv"|"ofx") forces the file
|
|
962
|
+
// format; otherwise it is auto-detected exactly like `parseBankStatement`, so
|
|
963
|
+
// inspect gives the SAME answer the reconcile pipeline would for OFX/QFX exports.
|
|
964
|
+
// Returns the structured report described in the module header.
|
|
965
|
+
function diagnoseSource(source, text, opts = {}) {
|
|
966
|
+
const cfg = DIAGNOSE_CONFIG[source];
|
|
967
|
+
if (!cfg) {
|
|
968
|
+
throw new IngestError(`unknown source "${source}" for diagnose`);
|
|
969
|
+
}
|
|
970
|
+
const sampleSize = opts.sampleSize == null ? 5 : opts.sampleSize;
|
|
971
|
+
|
|
972
|
+
// Bank files may be OFX/QFX. Honour an explicit format, else auto-detect with
|
|
973
|
+
// the SAME predicate parseBankStatement uses, and route to the OFX diagnostic
|
|
974
|
+
// path so the onboarding tool never gives a worse answer than the real pipeline.
|
|
975
|
+
if (source === SOURCE.BANK && text != null) {
|
|
976
|
+
const fmt =
|
|
977
|
+
opts.format ||
|
|
978
|
+
(/<OFX>|<STMTTRN>|OFXHEADER/i.test(text) ? "ofx" : "csv");
|
|
979
|
+
if (fmt === "ofx") return diagnoseOFX(text, sampleSize);
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
const report = {
|
|
983
|
+
source,
|
|
984
|
+
format: "csv",
|
|
985
|
+
header: [],
|
|
986
|
+
mapped: {},
|
|
987
|
+
requiredMissing: [],
|
|
988
|
+
rowCount: 0,
|
|
989
|
+
okCount: 0,
|
|
990
|
+
records: [],
|
|
991
|
+
errors: [],
|
|
992
|
+
sample: [],
|
|
993
|
+
};
|
|
994
|
+
|
|
995
|
+
// A null/empty file is a whole-file problem, not a row problem. Report it as a
|
|
996
|
+
// hard error rather than throwing, so the inspect command can surface it.
|
|
997
|
+
if (text == null) {
|
|
998
|
+
report.errors.push({ row: null, message: `no ${source} input` });
|
|
999
|
+
return report;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
const rows = parseCSV(text);
|
|
1003
|
+
if (rows.length === 0) {
|
|
1004
|
+
report.errors.push({ row: null, message: `empty ${source} file` });
|
|
1005
|
+
return report;
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
const header = rows[0].map((h) => String(h));
|
|
1009
|
+
report.header = header.slice();
|
|
1010
|
+
|
|
1011
|
+
// Reuse indexHeader VERBATIM (including the SAME columnMap the reconcile run
|
|
1012
|
+
// will use, so `inspect` previews under the identical mapping), then translate
|
|
1013
|
+
// each index back to the ORIGINAL header name (or null when unmatched) so the
|
|
1014
|
+
// caller sees which column satisfied each logical field. A malformed columnMap
|
|
1015
|
+
// (unknown logical key or a header absent from the file) hard-errors here with
|
|
1016
|
+
// the SAME message the strict parser would give — surfaced as a file-level
|
|
1017
|
+
// error rather than crashing, so inspect can render it.
|
|
1018
|
+
let cols;
|
|
1019
|
+
try {
|
|
1020
|
+
cols = indexHeader(header, cfg.schema, source, opts.columnMap);
|
|
1021
|
+
} catch (err) {
|
|
1022
|
+
if (err instanceof IngestError) {
|
|
1023
|
+
report.errors.push({ row: null, message: err.message });
|
|
1024
|
+
report.rowCount = Math.max(rows.length - 1, 0);
|
|
1025
|
+
return report;
|
|
1026
|
+
}
|
|
1027
|
+
throw err;
|
|
1028
|
+
}
|
|
1029
|
+
for (const key of Object.keys(cfg.schema)) {
|
|
1030
|
+
const idx = cols[key];
|
|
1031
|
+
report.mapped[key] = idx === -1 || idx === undefined ? null : header[idx];
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
// Missing REQUIRED columns are surfaced (hard problem) but do NOT collapse the
|
|
1035
|
+
// whole file — we still echo the header and the partial map back so the broker
|
|
1036
|
+
// can see exactly what to add or remap.
|
|
1037
|
+
for (const n of cfg.required) {
|
|
1038
|
+
if (cols[n] === -1 || cols[n] === undefined) {
|
|
1039
|
+
report.requiredMissing.push(n);
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
// An amount group must be present (signed amount OR a split pair). If none is,
|
|
1044
|
+
// record it as a hard error; without it, no row can yield a usable amount.
|
|
1045
|
+
const groupPresent = cfg.amountGroups.some((group) =>
|
|
1046
|
+
group.some((k) => cols[k] !== -1 && cols[k] !== undefined)
|
|
1047
|
+
);
|
|
1048
|
+
const hasSigned = cols.amount !== -1 && cols.amount !== undefined;
|
|
1049
|
+
|
|
1050
|
+
// If a required column or the amount group is missing, the per-row builder
|
|
1051
|
+
// would throw the SAME structural error on every single row (e.g. "missing
|
|
1052
|
+
// date"), which is noise. Report the structural problems once and return — the
|
|
1053
|
+
// caller fixes the header first, then re-runs to see row-level errors.
|
|
1054
|
+
if (report.requiredMissing.length > 0 || !groupPresent) {
|
|
1055
|
+
if (!groupPresent) {
|
|
1056
|
+
report.errors.push({ row: null, message: cfg.amountGroupMessage });
|
|
1057
|
+
}
|
|
1058
|
+
report.rowCount = Math.max(rows.length - 1, 0);
|
|
1059
|
+
return report;
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
for (let r = 1; r < rows.length; r++) {
|
|
1063
|
+
report.rowCount += 1;
|
|
1064
|
+
const loc = { row: r, source };
|
|
1065
|
+
try {
|
|
1066
|
+
const rec = cfg.build(rows[r], cols, hasSigned, loc);
|
|
1067
|
+
report.records.push(rec);
|
|
1068
|
+
report.okCount += 1;
|
|
1069
|
+
if (report.sample.length < sampleSize) report.sample.push(rec);
|
|
1070
|
+
} catch (err) {
|
|
1071
|
+
if (err instanceof IngestError) {
|
|
1072
|
+
report.errors.push({ row: r, message: err.message });
|
|
1073
|
+
} else {
|
|
1074
|
+
throw err; // a non-ingest bug is real — do not swallow it
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
return report;
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
// Pre-flight a resolved columnMap for a source against a file's actual header,
|
|
1083
|
+
// WITHOUT parsing any data rows. Reuses the SAME parseCSV + per-source schema +
|
|
1084
|
+
// validateColumnMap the strict parsers use, so it accepts/rejects EXACTLY what
|
|
1085
|
+
// the strict parse would — but it throws the IngestError EARLY (before any row
|
|
1086
|
+
// work), letting the CLI classify a bad map as a USAGE error (a bad flag value)
|
|
1087
|
+
// rather than an IO/data error. PURE; no I/O, no clock.
|
|
1088
|
+
//
|
|
1089
|
+
// For the bank source an OFX/QFX document has NO CSV header row and ignores the
|
|
1090
|
+
// columnMap entirely (parseBankStatement routes OFX past it), so this is a no-op
|
|
1091
|
+
// for OFX — there is nothing to validate against and nothing the strict parse
|
|
1092
|
+
// would reject. `opts.format` ("csv"|"ofx") forces the bank format; otherwise it
|
|
1093
|
+
// is auto-detected with the SAME predicate parseBankStatement uses.
|
|
1094
|
+
function validateColumnMapForSource(source, text, columnMap, opts = {}) {
|
|
1095
|
+
if (!columnMap || Object.keys(columnMap).length === 0) return;
|
|
1096
|
+
const cfg = DIAGNOSE_CONFIG[source];
|
|
1097
|
+
if (!cfg) {
|
|
1098
|
+
throw new IngestError(`unknown source "${source}" for column-map validation`);
|
|
1099
|
+
}
|
|
1100
|
+
// OFX bank files carry no header to validate the map against; the strict
|
|
1101
|
+
// parser ignores columnMap for OFX, so skip (no-op), matching that behaviour.
|
|
1102
|
+
if (source === SOURCE.BANK && text != null) {
|
|
1103
|
+
const fmt =
|
|
1104
|
+
opts.format ||
|
|
1105
|
+
(/<OFX>|<STMTTRN>|OFXHEADER/i.test(text) ? "ofx" : "csv");
|
|
1106
|
+
if (fmt === "ofx") return;
|
|
1107
|
+
}
|
|
1108
|
+
if (text == null) return; // a null file is its own (later) error, not a map error
|
|
1109
|
+
const rows = parseCSV(text);
|
|
1110
|
+
if (rows.length === 0) return; // an empty file is its own (later) error
|
|
1111
|
+
// Throws an IngestError (naming available headers/fields) on a bad entry.
|
|
1112
|
+
validateColumnMap(columnMap, rows[0], cfg.schema, source);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
// Report the accepted header ALIASES for a logical field of a source. The
|
|
1116
|
+
// inspect/onboarding path uses this to print an ACTIONABLE hint ("add a column
|
|
1117
|
+
// named one of [...]") without re-declaring the schema — it reads the SAME
|
|
1118
|
+
// schema the diagnostic + strict parsers consult, so the hint can never drift
|
|
1119
|
+
// from what the parser actually accepts. Returns [] for an unknown field.
|
|
1120
|
+
function aliasesFor(source, logical) {
|
|
1121
|
+
const cfg = DIAGNOSE_CONFIG[source];
|
|
1122
|
+
if (!cfg) throw new IngestError(`unknown source "${source}" for aliasesFor`);
|
|
1123
|
+
const a = cfg.schema[logical];
|
|
1124
|
+
return Array.isArray(a) ? a.slice() : [];
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
// Convenience per-source wrappers (the `diagnose{Bank,QuickBooks,RentRoll}`
|
|
1128
|
+
// family named in the acceptance), each a thin call into diagnoseSource.
|
|
1129
|
+
function diagnoseBank(text, opts) {
|
|
1130
|
+
return diagnoseSource(SOURCE.BANK, text, opts);
|
|
1131
|
+
}
|
|
1132
|
+
function diagnoseQuickBooks(text, opts) {
|
|
1133
|
+
return diagnoseSource(SOURCE.QUICKBOOKS, text, opts);
|
|
1134
|
+
}
|
|
1135
|
+
function diagnoseRentRoll(text, opts) {
|
|
1136
|
+
return diagnoseSource(SOURCE.RENT_ROLL, text, opts);
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
module.exports = {
|
|
1140
|
+
SOURCE,
|
|
1141
|
+
KIND,
|
|
1142
|
+
IngestError,
|
|
1143
|
+
// primitives (exported for focused tests / reuse)
|
|
1144
|
+
parseCents,
|
|
1145
|
+
parseDate,
|
|
1146
|
+
parseCSV,
|
|
1147
|
+
classifyKind,
|
|
1148
|
+
// the three normalizers
|
|
1149
|
+
validateColumnMap,
|
|
1150
|
+
parseBankStatement,
|
|
1151
|
+
parseBankCSV,
|
|
1152
|
+
parseOFX,
|
|
1153
|
+
diagnoseOFX,
|
|
1154
|
+
parseQuickBooksCSV,
|
|
1155
|
+
parseRentRollCSV,
|
|
1156
|
+
validateColumnMapForSource,
|
|
1157
|
+
// diagnostic ingest core (T-25.1) — parse-with-report, never fail-closed
|
|
1158
|
+
diagnoseSource,
|
|
1159
|
+
diagnoseBank,
|
|
1160
|
+
diagnoseQuickBooks,
|
|
1161
|
+
diagnoseRentRoll,
|
|
1162
|
+
aliasesFor,
|
|
1163
|
+
};
|