@parseo/appraisals 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +35 -0
  2. package/dist/form-1004mc/extract-checkboxes.d.ts +43 -0
  3. package/dist/form-1004mc/extract-checkboxes.d.ts.map +1 -0
  4. package/dist/form-1004mc/extract-checkboxes.js +145 -0
  5. package/dist/form-1004mc/index.d.ts +3 -0
  6. package/dist/form-1004mc/index.d.ts.map +1 -0
  7. package/dist/form-1004mc/index.js +1 -0
  8. package/dist/form-1004mc/parse-page1.d.ts +8 -0
  9. package/dist/form-1004mc/parse-page1.d.ts.map +1 -0
  10. package/dist/form-1004mc/parse-page1.js +760 -0
  11. package/dist/form-1004mc/parse-sales.d.ts +6 -0
  12. package/dist/form-1004mc/parse-sales.d.ts.map +1 -0
  13. package/dist/form-1004mc/parse-sales.js +505 -0
  14. package/dist/form-1004mc/parser.d.ts +5 -0
  15. package/dist/form-1004mc/parser.d.ts.map +1 -0
  16. package/dist/form-1004mc/parser.js +437 -0
  17. package/dist/form-1004mc/types.d.ts +302 -0
  18. package/dist/form-1004mc/types.d.ts.map +1 -0
  19. package/dist/form-1004mc/types.js +1 -0
  20. package/dist/form-1073/index.d.ts +3 -0
  21. package/dist/form-1073/index.d.ts.map +1 -0
  22. package/dist/form-1073/index.js +1 -0
  23. package/dist/form-1073/parse-page1.d.ts +8 -0
  24. package/dist/form-1073/parse-page1.d.ts.map +1 -0
  25. package/dist/form-1073/parse-page1.js +704 -0
  26. package/dist/form-1073/parse-page2.d.ts +6 -0
  27. package/dist/form-1073/parse-page2.d.ts.map +1 -0
  28. package/dist/form-1073/parse-page2.js +438 -0
  29. package/dist/form-1073/parse-sales.d.ts +7 -0
  30. package/dist/form-1073/parse-sales.d.ts.map +1 -0
  31. package/dist/form-1073/parse-sales.js +477 -0
  32. package/dist/form-1073/parser.d.ts +5 -0
  33. package/dist/form-1073/parser.d.ts.map +1 -0
  34. package/dist/form-1073/parser.js +102 -0
  35. package/dist/form-1073/types.d.ts +300 -0
  36. package/dist/form-1073/types.d.ts.map +1 -0
  37. package/dist/form-1073/types.js +1 -0
  38. package/dist/index.d.ts +13 -0
  39. package/dist/index.d.ts.map +1 -0
  40. package/dist/index.js +9 -0
  41. package/dist/richer-values/index.d.ts +3 -0
  42. package/dist/richer-values/index.d.ts.map +1 -0
  43. package/dist/richer-values/index.js +1 -0
  44. package/dist/richer-values/parser.d.ts +5 -0
  45. package/dist/richer-values/parser.d.ts.map +1 -0
  46. package/dist/richer-values/parser.js +1067 -0
  47. package/dist/richer-values/types.d.ts +225 -0
  48. package/dist/richer-values/types.d.ts.map +1 -0
  49. package/dist/richer-values/types.js +1 -0
  50. package/package.json +24 -0
@@ -0,0 +1,1067 @@
1
+ import { extractLines, UnrecognizedFormatError, toBBox } from "@parseo/shared";
2
+ export async function parseRicherValuesReport(buffer) {
3
+ const lines = await extractLines(buffer);
4
+ return parseRicherValuesReportFromLines(lines);
5
+ }
6
+ export function parseRicherValuesReportFromLines(lines) {
7
+ // Format fingerprint: Richer Values reports start with "Renovation Analysis" or
8
+ // similar report type, followed by an address, and have "Valuation Summary" on page 2
9
+ const head = lines.slice(0, 15).map((l) => l.fullText).join("\n");
10
+ if (!/Renovation Analysis|Valuation Summary/i.test(head)) {
11
+ throw new UnrecognizedFormatError("RicherValues", "first 15 lines do not contain a RicherValues report signature");
12
+ }
13
+ const coverPage = parseCoverPage(lines);
14
+ const valuationSummary = parseValuationSummary(lines);
15
+ const valuationPage = parseValuationPage(lines);
16
+ const closestComparables = parseComparablesSection(lines, "Closest Market Comparables");
17
+ const additionalComparables = parseComparablesSection(lines, "Additional Comparables");
18
+ const excludedComparables = parseComparablesSection(lines, "Additional Comps Excluded From the Analysis");
19
+ const budgetFlags = parseBudgetFlags(lines);
20
+ const budgetLineItems = parseBudgetLineItems(lines);
21
+ return {
22
+ coverPage,
23
+ valuationSummary,
24
+ valuationPage,
25
+ closestComparables,
26
+ additionalComparables,
27
+ excludedComparables,
28
+ budgetFlags,
29
+ budgetLineItems,
30
+ };
31
+ }
32
+ // ── Cover Page (Page 1) ─────────────────────────────────────────────────────
33
+ function parseCoverPage(lines) {
34
+ const page1 = lines.filter((l) => l.page === 1);
35
+ const bb = {};
36
+ // Report type is the first substantial text line (e.g. "Renovation Analysis")
37
+ const reportTypeLine = page1.find((l) => /renovation analysis|desktop review|bpo|appraisal/i.test(l.fullText));
38
+ const reportType = reportTypeLine?.fullText ?? "";
39
+ if (reportTypeLine?.segments[0])
40
+ bb.reportType = toBBox(reportTypeLine.segments[0], reportTypeLine);
41
+ // Address line
42
+ const addressLine = page1.find((l) => /\d+.*,\s*[A-Z]{2},?\s*\d{5}/.test(l.fullText));
43
+ const address = addressLine?.fullText ?? "";
44
+ if (addressLine?.segments[0])
45
+ bb.address = toBBox(addressLine.segments[0], addressLine);
46
+ // Property details line — e.g. "1,504 sqft 3 + 2.00; 1962 SFR"
47
+ const detailsLine = page1.find((l) => /sqft/i.test(l.fullText));
48
+ const details = parsePropertyDetails(detailsLine?.fullText ?? "");
49
+ if (detailsLine?.segments[0])
50
+ bb.propertyDetails = toBBox(detailsLine.segments[0], detailsLine);
51
+ // Effective date
52
+ const dateLine = page1.find((l) => /effective date/i.test(l.fullText));
53
+ const effectiveDate = parseEffectiveDate(dateLine?.fullText ?? "");
54
+ if (dateLine?.segments[0])
55
+ bb.effectiveDate = toBBox(dateLine.segments[0], dateLine);
56
+ // Prepared For block
57
+ const prepIdx = page1.findIndex((l) => /prepared for/i.test(l.fullText));
58
+ const preparedFor = parsePreparedFor(page1);
59
+ if (prepIdx >= 0 && page1[prepIdx + 1]?.segments[0]) {
60
+ bb.preparedForName = toBBox(page1[prepIdx + 1].segments[0], page1[prepIdx + 1]);
61
+ }
62
+ return {
63
+ reportType,
64
+ address,
65
+ ...details,
66
+ effectiveDate,
67
+ preparedFor,
68
+ boundingBoxes: bb,
69
+ };
70
+ }
71
+ function parsePropertyDetails(text) {
72
+ const sqftMatch = text.match(/([\d,]+)\s*sqft/i);
73
+ const sqft = sqftMatch ? parseInt(sqftMatch[1].replace(/,/g, ""), 10) : null;
74
+ const bedBathMatch = text.match(/(\d+)\s*\+\s*([\d.]+)/);
75
+ const beds = bedBathMatch ? parseInt(bedBathMatch[1], 10) : null;
76
+ const baths = bedBathMatch ? parseFloat(bedBathMatch[2]) : null;
77
+ const yearMatch = text.match(/(\d{4})\s+([A-Z]{2,})/);
78
+ const yearBuilt = yearMatch ? parseInt(yearMatch[1], 10) : null;
79
+ const propertyType = yearMatch ? yearMatch[2] : "";
80
+ return { sqft, beds, baths, yearBuilt, propertyType };
81
+ }
82
+ function parseEffectiveDate(text) {
83
+ const match = text.match(/effective date:\s*(.+)/i);
84
+ if (!match)
85
+ return "";
86
+ const dateStr = match[1].trim();
87
+ const parsed = new Date(dateStr);
88
+ if (isNaN(parsed.getTime()))
89
+ return dateStr;
90
+ const yyyy = parsed.getFullYear();
91
+ const mm = String(parsed.getMonth() + 1).padStart(2, "0");
92
+ const dd = String(parsed.getDate()).padStart(2, "0");
93
+ return `${yyyy}-${mm}-${dd}`;
94
+ }
95
+ function parsePreparedFor(page1Lines) {
96
+ const prepIdx = page1Lines.findIndex((l) => /prepared for/i.test(l.fullText));
97
+ if (prepIdx < 0)
98
+ return { name: "", address: "" };
99
+ const afterLines = page1Lines.slice(prepIdx + 1);
100
+ const name = afterLines[0]?.fullText ?? "";
101
+ const addressParts = afterLines.slice(1).map((l) => l.fullText);
102
+ const address = addressParts.join(", ");
103
+ return { name, address };
104
+ }
105
+ // ── Valuation Summary and Parameters (Pages 2-4) ────────────────────────────
106
+ function getBodyLines(lines) {
107
+ return lines.filter((l) => l.page >= 2 &&
108
+ l.page <= 4 &&
109
+ !isHeaderOrFooter(l));
110
+ }
111
+ function isHeaderOrFooter(l) {
112
+ const t = l.fullText;
113
+ return (/^Renovation Analysis$/i.test(t) ||
114
+ /^\d+.*,\s*[A-Z]{2},?\s*\d{5}$/.test(t) ||
115
+ /^For a complete set of terms/i.test(t));
116
+ }
117
+ function parseValuationSummary(lines) {
118
+ const body = getBodyLines(lines);
119
+ const bb = {};
120
+ const sectionLine = body.find((l) => /Valuation Summary and Parameters/i.test(l.fullText));
121
+ if (sectionLine?.segments[0])
122
+ bb.sectionTitle = toBBox(sectionLine.segments[0], sectionLine);
123
+ return {
124
+ commentary: parseValuationCommentary(body),
125
+ propertyDataSources: parsePropertyDataSources(body),
126
+ subjectPropertyDetails: parseSubjectPropertyDetails(body),
127
+ comparableSearchParameters: parseComparableSearchParameters(body),
128
+ verificationOfCondition: parseVerificationOfCondition(body),
129
+ listingHistory: parseListingHistory(body),
130
+ neighborhood: parseNeighborhood(body),
131
+ preparedBy: parsePreparedBySection(body),
132
+ boundingBoxes: bb,
133
+ };
134
+ }
135
+ // ── Valuation Commentary ────────────────────────────────────────────────────
136
+ const FIELD_BOUNDARY = /^(Hyper-Local Neighborhood|Subject Property Assessment|Budget Assessment|Budget Flags|Estimated Valuation|Valuation Commentary):/i;
137
+ const SECTION_BOUNDARY = /^(Property Data Sources|Subject Property Details|Comparable Search Parameters|Neighborhood:|Verification of Physical|External Data Sources|Prepared By:|Value Drivers|Distance-Based Comps:|Size-Based Comps:|Additional Comps:|Custom Comp Search:|Additional Analyses Conducted:)/i;
138
+ function extractCommentaryField(body, label, bb, bbKey) {
139
+ const idx = body.findIndex((l) => l.fullText.includes(label));
140
+ if (idx < 0)
141
+ return "";
142
+ const firstLine = body[idx];
143
+ const afterLabel = firstLine.fullText.slice(firstLine.fullText.indexOf(label) + label.length).trim();
144
+ // Attach bounding box to the label's segment
145
+ if (bb && bbKey) {
146
+ for (const seg of firstLine.segments) {
147
+ if (seg.text.includes(label.replace(":", ""))) {
148
+ bb[bbKey] = toBBox(seg, firstLine);
149
+ break;
150
+ }
151
+ }
152
+ }
153
+ const parts = [afterLabel];
154
+ for (let i = idx + 1; i < body.length; i++) {
155
+ const text = body[i].fullText;
156
+ if (FIELD_BOUNDARY.test(text) || SECTION_BOUNDARY.test(text))
157
+ break;
158
+ parts.push(text);
159
+ }
160
+ return parts.join(" ").trim();
161
+ }
162
+ function parseValuationCommentary(body) {
163
+ const bb = {};
164
+ return {
165
+ hyperLocalNeighborhood: extractCommentaryField(body, "Hyper-Local Neighborhood:", bb, "hyperLocalNeighborhood"),
166
+ subjectPropertyAssessment: extractCommentaryField(body, "Subject Property Assessment:", bb, "subjectPropertyAssessment"),
167
+ budgetAssessment: extractCommentaryField(body, "Budget Assessment:", bb, "budgetAssessment"),
168
+ budgetFlags: extractCommentaryField(body, "Budget Flags:", bb, "budgetFlags"),
169
+ estimatedValuation: extractCommentaryField(body, "Estimated Valuation:", bb, "estimatedValuation"),
170
+ boundingBoxes: bb,
171
+ };
172
+ }
173
+ // ── Property Data Sources ───────────────────────────────────────────────────
174
+ function parsePropertyDataSources(body) {
175
+ const headerIdx = body.findIndex((l) => /^Property Data Sources$/i.test(l.fullText));
176
+ if (headerIdx < 0)
177
+ return [];
178
+ const sources = ["Used by RV", "Upload", "MLS", "County", "Manual"];
179
+ const rows = [];
180
+ for (const line of body.slice(headerIdx + 1)) {
181
+ const source = sources.find((s) => line.fullText.startsWith(s));
182
+ if (!source) {
183
+ if (rows.length > 0 && /Subject Property/i.test(line.fullText))
184
+ break;
185
+ continue;
186
+ }
187
+ const bb = {};
188
+ bb.source = toBBox(line.segments[0], line);
189
+ const segs = line.segments.slice(1);
190
+ const colNames = ["above", "below", "total", "beds", "baths", "stories", "year", "lot", "garage"];
191
+ const vals = [];
192
+ for (let i = 0; i < segs.length; i++) {
193
+ const t = segs[i].text.trim();
194
+ if (t === "-" || t === "") {
195
+ vals.push(null);
196
+ }
197
+ else {
198
+ vals.push(parseFloat(t.replace(/,/g, "")));
199
+ if (colNames[i])
200
+ bb[colNames[i]] = toBBox(segs[i], line);
201
+ }
202
+ }
203
+ rows.push({
204
+ source,
205
+ above: vals[0] ?? null,
206
+ below: vals[1] ?? null,
207
+ total: vals[2] ?? null,
208
+ beds: vals[3] ?? null,
209
+ baths: vals[4] ?? null,
210
+ stories: vals[5] ?? null,
211
+ year: vals[6] ?? null,
212
+ lot: vals[7] ?? null,
213
+ garage: vals[8] ?? null,
214
+ boundingBoxes: bb,
215
+ });
216
+ }
217
+ return rows;
218
+ }
219
+ // ── Subject Property Details ────────────────────────────────────────────────
220
+ function parseSubjectPropertyDetails(body) {
221
+ const bb = {};
222
+ const secIdx = body.findIndex((l) => /Subject Property Details/i.test(l.fullText));
223
+ const address = findLabelValue(body, secIdx, "Address", bb, "address");
224
+ const apn = findLabelValue(body, secIdx, "Assessor Parcel Number", bb, "apn");
225
+ const comparisonMetrics = findLabelValue(body, secIdx, "Subject Property Comparison Metrics", bb, "comparisonMetrics");
226
+ // Current Use row
227
+ const currentUseLine = body.find((l) => l.page >= 2 && /^Current Use\b/i.test(l.fullText));
228
+ const currentUse = parseCurrentUseRow(currentUseLine);
229
+ if (currentUseLine?.segments[0])
230
+ bb.currentUse = toBBox(currentUseLine.segments[0], currentUseLine);
231
+ // Percentile row
232
+ const percentileLine = body.find((l) => l.page >= 2 && /^Percentile\b/i.test(l.fullText));
233
+ const percentile = parsePercentileRow(percentileLine);
234
+ if (percentileLine?.segments[0])
235
+ bb.percentile = toBBox(percentileLine.segments[0], percentileLine);
236
+ // Projected Use
237
+ const projectedLine = body.find((l) => l.page >= 2 && /^Projected Use\b/i.test(l.fullText));
238
+ const projectedUse = projectedLine
239
+ ? projectedLine.segments.slice(1).map((s) => s.text).join(" ").trim()
240
+ : "";
241
+ if (projectedLine?.segments[0])
242
+ bb.projectedUse = toBBox(projectedLine.segments[0], projectedLine);
243
+ return {
244
+ address,
245
+ apn,
246
+ comparisonMetrics,
247
+ currentUse,
248
+ percentile,
249
+ projectedUse,
250
+ boundingBoxes: bb,
251
+ };
252
+ }
253
+ function findLabelValue(body, afterIdx, label, bb, bbKey) {
254
+ if (afterIdx < 0)
255
+ return "";
256
+ const line = body.slice(afterIdx).find((l) => l.segments.length >= 2 && l.segments[0].text.includes(label));
257
+ if (!line)
258
+ return "";
259
+ if (bb && bbKey && line.segments[1]) {
260
+ bb[bbKey] = toBBox(line.segments[1], line);
261
+ }
262
+ return line.segments.slice(1).map((s) => s.text).join(" ").trim();
263
+ }
264
+ function parseCurrentUseRow(line) {
265
+ if (!line)
266
+ return { type: "", sqft: null, beds: null, baths: null, yearBuilt: null, acres: null };
267
+ const segs = line.segments.slice(1);
268
+ const vals = segs.map((s) => s.text.trim());
269
+ return {
270
+ type: vals[0] ?? "",
271
+ sqft: parseNum(vals[1]),
272
+ beds: parseNum(vals[2]),
273
+ baths: parseNum(vals[3]),
274
+ yearBuilt: parseNum(vals[4]),
275
+ acres: parseNum(vals[5]),
276
+ };
277
+ }
278
+ function parsePercentileRow(line) {
279
+ if (!line)
280
+ return { sqft: "", beds: "", baths: "", yearBuilt: "", acres: "" };
281
+ const segs = line.segments.slice(1);
282
+ const vals = segs.map((s) => s.text.trim());
283
+ return {
284
+ sqft: vals[0] ?? "",
285
+ beds: vals[1] ?? "",
286
+ baths: vals[2] ?? "",
287
+ yearBuilt: vals[3] ?? "",
288
+ acres: vals[4] ?? "",
289
+ };
290
+ }
291
+ function parseNum(val) {
292
+ if (!val)
293
+ return null;
294
+ const clean = val.replace(/,/g, "").trim();
295
+ if (clean === "-" || clean === "")
296
+ return null;
297
+ const n = parseFloat(clean);
298
+ return isNaN(n) ? null : n;
299
+ }
300
+ // ── Comparable Search Parameters ────────────────────────────────────────────
301
+ function parseComparableSearchParameters(body) {
302
+ const bb = {};
303
+ return {
304
+ distanceBasedComps: extractCommentaryField(body, "Distance-Based Comps:", bb, "distanceBasedComps"),
305
+ sizeBasedComps: extractCommentaryField(body, "Size-Based Comps:", bb, "sizeBasedComps"),
306
+ additionalComps: extractCommentaryField(body, "Additional Comps:", bb, "additionalComps"),
307
+ customCompSearch: extractCommentaryField(body, "Custom Comp Search:", bb, "customCompSearch"),
308
+ additionalAnalyses: extractCommentaryField(body, "Additional Analyses Conducted:", bb, "additionalAnalyses"),
309
+ boundingBoxes: bb,
310
+ };
311
+ }
312
+ // ── Verification of Condition ───────────────────────────────────────────────
313
+ function parseVerificationOfCondition(body) {
314
+ const idx = body.findIndex((l) => /Verification of Physical Condition/i.test(l.fullText));
315
+ if (idx < 0)
316
+ return "";
317
+ const parts = [];
318
+ for (let i = idx + 1; i < body.length; i++) {
319
+ const t = body[i].fullText;
320
+ if (/Subject Property Listing History/i.test(t))
321
+ break;
322
+ parts.push(t);
323
+ }
324
+ return parts.join(" ").trim();
325
+ }
326
+ // ── Listing History ─────────────────────────────────────────────────────────
327
+ function parseListingHistory(body) {
328
+ const idx = body.findIndex((l) => /Subject Property Listing History/i.test(l.fullText));
329
+ if (idx < 0)
330
+ return "";
331
+ const parts = [];
332
+ for (let i = idx + 1; i < body.length; i++) {
333
+ const t = body[i].fullText;
334
+ if (/^Neighborhood:/i.test(t))
335
+ break;
336
+ parts.push(t);
337
+ }
338
+ return parts.join(" ").trim();
339
+ }
340
+ // ── Neighborhood ────────────────────────────────────────────────────────────
341
+ function parseNeighborhood(body) {
342
+ const bb = {};
343
+ const labelValue = (label, bbKey) => {
344
+ const line = body.find((l) => l.segments.length >= 1 && l.segments[0].text.includes(label));
345
+ if (!line)
346
+ return "";
347
+ const valSeg = line.segments[1];
348
+ if (valSeg)
349
+ bb[bbKey] = toBBox(valSeg, line);
350
+ return line.segments.slice(1).map((s) => s.text).join(" ").trim();
351
+ };
352
+ // Land use types — label and value lines interleaved by y position
353
+ const landUseLabelIdx = body.findIndex((l) => l.segments.some((s) => s.text.includes("Land Use Types Present")));
354
+ const landUseConcernsIdx = body.findIndex((l) => l.segments.some((s) => s.text.includes("Land Use Concerns")));
355
+ let landUseTypesPresent = "";
356
+ if (landUseLabelIdx >= 0) {
357
+ const startIdx = Math.max(0, landUseLabelIdx - 2);
358
+ const endIdx = landUseConcernsIdx > landUseLabelIdx ? landUseConcernsIdx : landUseLabelIdx + 3;
359
+ const valueParts = [];
360
+ let firstValSeg = false;
361
+ for (let i = startIdx; i < endIdx; i++) {
362
+ for (const seg of body[i].segments) {
363
+ if (!seg.text.includes("Land Use Types Present") && seg.x >= 200) {
364
+ valueParts.push(seg.text.trim());
365
+ if (!firstValSeg) {
366
+ bb.landUseTypesPresent = toBBox(seg, body[i]);
367
+ firstValSeg = true;
368
+ }
369
+ }
370
+ }
371
+ }
372
+ landUseTypesPresent = valueParts.join(" ").replace(/\s+/g, " ").replace(/,\s*$/, "").trim();
373
+ }
374
+ // Flood info
375
+ const floodMapLine = body.find((l) => l.segments.some((s) => s.text.includes("Map Number")));
376
+ const floodMapNumber = floodMapLine
377
+ ? floodMapLine.segments[floodMapLine.segments.length - 1].text.trim()
378
+ : "";
379
+ if (floodMapLine) {
380
+ const valSeg = floodMapLine.segments[floodMapLine.segments.length - 1];
381
+ bb.floodMapNumber = toBBox(valSeg, floodMapLine);
382
+ }
383
+ const mapDateLine = body.find((l) => l.segments.some((s) => s.text.includes("Map Effective Date")));
384
+ const floodMapEffectiveDate = mapDateLine
385
+ ? mapDateLine.segments[mapDateLine.segments.length - 1].text.trim()
386
+ : "";
387
+ if (mapDateLine) {
388
+ const valSeg = mapDateLine.segments[mapDateLine.segments.length - 1];
389
+ bb.floodMapEffectiveDate = toBBox(valSeg, mapDateLine);
390
+ }
391
+ const floodZoneLine = body.find((l) => l.segments.some((s) => s.text.includes("Is it in the Flood Zone?")));
392
+ const isInFloodZone = floodZoneLine
393
+ ? floodZoneLine.segments[floodZoneLine.segments.length - 1].text.trim()
394
+ : "";
395
+ if (floodZoneLine) {
396
+ const valSeg = floodZoneLine.segments[floodZoneLine.segments.length - 1];
397
+ bb.isInFloodZone = toBBox(valSeg, floodZoneLine);
398
+ }
399
+ const specialFloodLine = body.find((l) => l.segments.some((s) => s.text.includes("Special Flood Hazard")));
400
+ const isInSpecialFloodHazard = specialFloodLine
401
+ ? specialFloodLine.segments[specialFloodLine.segments.length - 1].text.trim()
402
+ : "";
403
+ if (specialFloodLine) {
404
+ const valSeg = specialFloodLine.segments[specialFloodLine.segments.length - 1];
405
+ bb.isInSpecialFloodHazard = toBBox(valSeg, specialFloodLine);
406
+ }
407
+ // Conformance
408
+ const conformanceLine = body.find((l) => l.segments.some((s) => s.text.includes("conformance issues")));
409
+ let conformanceIssues = "";
410
+ if (conformanceLine) {
411
+ const confIdx = conformanceLine.segments.findIndex((s) => s.text.includes("conformance issues"));
412
+ const answer = conformanceLine.segments[confIdx + 1];
413
+ if (answer && !answer.text.includes("Map Effective")) {
414
+ conformanceIssues = answer.text.trim();
415
+ bb.conformanceIssues = toBBox(answer, conformanceLine);
416
+ }
417
+ }
418
+ // Ownership
419
+ const ownershipLine = body.find((l) => l.segments.some((s) => /^Leasehold$/i.test(s.text.trim())));
420
+ let ownership = "";
421
+ if (ownershipLine) {
422
+ const leaseIdx = ownershipLine.segments.findIndex((s) => /^Leasehold$/i.test(s.text.trim()));
423
+ const answer = ownershipLine.segments[leaseIdx + 1];
424
+ if (answer && !answer.text.includes("Flood")) {
425
+ ownership = answer.text.trim();
426
+ bb.ownership = toBBox(answer, ownershipLine);
427
+ }
428
+ }
429
+ // Zoning
430
+ const zoningLine = body.find((l) => l.page >= 3 && l.segments.length >= 2 && l.segments.some((s) => s.text.includes("Flood Information")));
431
+ const zoningIdx = zoningLine ? body.indexOf(zoningLine) : -1;
432
+ let zoningText = "";
433
+ if (zoningIdx >= 0 && zoningIdx + 1 < body.length) {
434
+ const nextLine = body[zoningIdx + 1];
435
+ zoningText = nextLine.segments[0]?.text.trim() ?? "";
436
+ if (nextLine.segments[0])
437
+ bb.zoning = toBBox(nextLine.segments[0], nextLine);
438
+ }
439
+ return {
440
+ landUseTypesPresent,
441
+ landUseConcerns: labelValue("Land Use Concerns:", "landUseConcerns"),
442
+ averageAgeOfResidentialUnits: labelValue("Average Age of Residential Units:", "averageAgeOfResidentialUnits"),
443
+ averageBuildingCondition: labelValue("Average Building Condition:", "averageBuildingCondition"),
444
+ averageBuildingQuality: labelValue("Average Building Quality:", "averageBuildingQuality"),
445
+ soldCompPercentRemodeled: labelValue("Sold Comp Percent Remodeled:", "soldCompPercentRemodeled"),
446
+ zoning: zoningText,
447
+ floodMapNumber,
448
+ floodMapEffectiveDate,
449
+ isInFloodZone,
450
+ isInSpecialFloodHazard,
451
+ conformanceIssues,
452
+ ownership,
453
+ boundingBoxes: bb,
454
+ };
455
+ }
456
+ // ── Prepared By ─────────────────────────────────────────────────────────────
457
+ function parsePreparedBySection(body) {
458
+ const bb = {};
459
+ const line = body.find((l) => /^Prepared By:/i.test(l.fullText));
460
+ if (!line)
461
+ return { name: "", email: "", phone: "", date: "", boundingBoxes: bb };
462
+ if (line.segments[0])
463
+ bb.preparedBy = toBBox(line.segments[0], line);
464
+ const text = line.fullText.replace(/^Prepared By:\s*/i, "");
465
+ const emailMatch = text.match(/([\w.+-]+@[\w.-]+)/);
466
+ const phoneMatch = text.match(/(\(?\d{3}\)?\s*[\d-]{7,})/);
467
+ const email = emailMatch ? emailMatch[1] : "";
468
+ const phone = phoneMatch ? phoneMatch[1] : "";
469
+ let name = text;
470
+ if (emailMatch)
471
+ name = name.slice(0, name.indexOf(emailMatch[1]));
472
+ name = name.replace(/,\s*$/, "").trim();
473
+ // Date is on a subsequent line
474
+ const lineIdx = body.indexOf(line);
475
+ let date = "";
476
+ for (let i = lineIdx + 1; i < body.length; i++) {
477
+ const t = body[i].fullText;
478
+ if (/\d{4}/.test(t) && /AM|PM/i.test(t)) {
479
+ date = t.trim();
480
+ if (body[i].segments[0])
481
+ bb.date = toBBox(body[i].segments[0], body[i]);
482
+ break;
483
+ }
484
+ }
485
+ return { name, email, phone, date, boundingBoxes: bb };
486
+ }
487
+ // ── Valuation Page (Page 5) ─────────────────────────────────────────────────
488
+ function parseValuationPage(lines) {
489
+ const page5 = lines.filter((l) => l.page === 5 && !isHeaderOrFooter(l));
490
+ return {
491
+ valuationResults: parseValuationResults(page5),
492
+ renovationStrategies: parseRenovationStrategies(page5),
493
+ marketDemand: parseMarketDemand(page5),
494
+ };
495
+ }
496
+ function parseValuationResults(body) {
497
+ const bb = {};
498
+ const fieldVal = (label, bbKey) => {
499
+ const line = body.find((l) => l.segments[0]?.text.includes(label));
500
+ if (!line)
501
+ return "";
502
+ const valSeg = line.segments[line.segments.length - 1];
503
+ if (valSeg && valSeg !== line.segments[0])
504
+ bb[bbKey] = toBBox(valSeg, line);
505
+ return valSeg?.text.trim() ?? "";
506
+ };
507
+ const currentCondition = fieldVal("Current Condition", "currentCondition");
508
+ const asIs = fieldVal("Estimated As Is Market Value", "estimatedAsIsMarketValue");
509
+ const budget = fieldVal("Borrower Budget", "borrowerBudget");
510
+ const targetCondition = fieldVal("Borrower Target Condition", "borrowerTargetCondition");
511
+ const arv = fieldVal("Estimated ARV at Target Condition", "estimatedARV");
512
+ return {
513
+ currentCondition,
514
+ estimatedAsIsMarketValue: parseCurrency(asIs),
515
+ borrowerBudget: parseCurrency(budget),
516
+ borrowerTargetCondition: targetCondition,
517
+ estimatedARV: parseCurrency(arv),
518
+ boundingBoxes: bb,
519
+ };
520
+ }
521
+ function parseCurrency(val) {
522
+ const clean = val.replace(/[$,]/g, "").trim();
523
+ if (!clean)
524
+ return null;
525
+ const n = parseFloat(clean);
526
+ return isNaN(n) ? null : n;
527
+ }
528
+ function parseRenovationStrategies(body) {
529
+ const bb = {};
530
+ // Find the column header line with Min, Partial, Full, Best
531
+ const headerLine = body.find((l) => l.segments.some((s) => s.text.trim() === "Min") &&
532
+ l.segments.some((s) => s.text.trim() === "Full"));
533
+ if (headerLine?.segments[0])
534
+ bb.header = toBBox(headerLine.segments[0], headerLine);
535
+ // Determine value column boundaries from the header (Min, Partial, Full, Best).
536
+ const minSeg = headerLine?.segments.find((s) => s.text.trim() === "Min");
537
+ const bestSeg = headerLine?.segments.find((s) => s.text.trim() === "Best");
538
+ const valXMin = minSeg ? minSeg.x - 15 : 75;
539
+ const valXMax = bestSeg ? bestSeg.x + bestSeg.width + 15 : 290;
540
+ // Extract value segments: within the strategy column range only
541
+ const getValSegs = (line) => line.segments.filter((s) => s.x >= valXMin && s.x <= valXMax);
542
+ // Row parser: find line by label, extract 4 values from segments
543
+ const getRow = (label) => {
544
+ const line = body.find((l) => l.segments.some((s) => s.text.trim() === label || s.text.includes(label)));
545
+ if (!line)
546
+ return [undefined, undefined, undefined, undefined];
547
+ return getValSegs(line).map((s) => s.text.trim());
548
+ };
549
+ // Find a table row: line must have a label AND at least 3 value segments
550
+ const getRowWithBB = (label, bbPrefix) => {
551
+ const line = body.find((l) => {
552
+ const hasLabel = l.segments.some((s) => s.text.trim() === label || s.text.includes(label));
553
+ const valCount = getValSegs(l).length;
554
+ return hasLabel && valCount >= 3;
555
+ });
556
+ if (!line)
557
+ return [undefined, undefined, undefined, undefined];
558
+ const valSegs = getValSegs(line);
559
+ const strategies = ["min", "partial", "full", "best"];
560
+ valSegs.forEach((s, i) => {
561
+ if (strategies[i])
562
+ bb[`${bbPrefix}_${strategies[i]}`] = toBBox(s, line);
563
+ });
564
+ return valSegs.map((s) => s.text.trim());
565
+ };
566
+ const arvRow = getRowWithBB("ARV", "arv");
567
+ // "As Is Value" line has a quirk — first segment may include "As Is Value $580,000"
568
+ const asIsLine = body.find((l) => l.segments.some((s) => s.text.includes("As Is Value")));
569
+ let asIsRow = [undefined, undefined, undefined, undefined];
570
+ if (asIsLine) {
571
+ const asIsValSegs = getValSegs(asIsLine);
572
+ // First value may be embedded in "As Is Value $580,000"
573
+ const embedded = asIsLine.segments.find((s) => s.text.includes("As Is Value"))?.text.match(/\$([\d,]+)/)?.[0];
574
+ if (embedded && asIsValSegs.length < 4) {
575
+ asIsRow = [embedded, ...asIsValSegs.map((s) => s.text.trim())];
576
+ }
577
+ else {
578
+ asIsRow = asIsValSegs.map((s) => s.text.trim());
579
+ }
580
+ }
581
+ const rehabRow = getRowWithBB("Rehab", "rehab");
582
+ const sqftRow = getRowWithBB("$/sqft", "perSqft");
583
+ const basisRow = getRowWithBB("Basis", "basis");
584
+ const netLiftRow = getRowWithBB("Net Lift", "netLift");
585
+ // Gross Return — may be split: "Gross" on one line, percentages on another, "Return" on a third
586
+ // Look for any line with percentage values in the strategy column range
587
+ const grossReturnLine = body.find((l) => {
588
+ const pctSegs = l.segments.filter((s) => /\d+\.\d+%/.test(s.text) && s.x >= valXMin);
589
+ return pctSegs.length >= 3;
590
+ });
591
+ const returnVals = grossReturnLine
592
+ ? grossReturnLine.segments.filter((s) => /\d+\.\d+%/.test(s.text)).map((s) => s.text.trim())
593
+ : [];
594
+ const returnStrategies = ["min", "partial", "full", "best"];
595
+ if (grossReturnLine) {
596
+ grossReturnLine.segments.filter((s) => /\d+\.\d+%/.test(s.text)).forEach((s, i) => {
597
+ if (returnStrategies[i])
598
+ bb[`grossReturn_${returnStrategies[i]}`] = toBBox(s, grossReturnLine);
599
+ });
600
+ }
601
+ // Timeline rows — use getValSegs for position-independent extraction
602
+ const rehabTimeLine = body.find((l) => l.segments.some((s) => /Rehab Time/.test(s.text)));
603
+ const rehabTimeVals = rehabTimeLine
604
+ ? getValSegs(rehabTimeLine).map((s) => s.text.trim())
605
+ : [];
606
+ const ttsLine = body.find((l) => l.segments.some((s) => s.text.trim() === "Estim TTS"));
607
+ const ttsVals = ttsLine
608
+ ? getValSegs(ttsLine).map((s) => s.text.trim())
609
+ : [];
610
+ const cushionLine = body.find((l) => l.segments.some((s) => s.text.trim() === "Cushion"));
611
+ const cushionVals = cushionLine
612
+ ? getValSegs(cushionLine).map((s) => s.text.trim())
613
+ : [];
614
+ const totalTimeLine = body.find((l) => l.segments.some((s) => s.text.trim() === "Total Time"));
615
+ const totalTimeVals = totalTimeLine
616
+ ? getValSegs(totalTimeLine).map((s) => s.text.trim())
617
+ : [];
618
+ // Annualized Return — may be split across lines, look for "1.42x" style values
619
+ const annReturnLine = body.find((l) => l.segments.some((s) => /\d+\.\d+x/.test(s.text)));
620
+ const annReturnVals = annReturnLine
621
+ ? annReturnLine.segments.filter((s) => /\d+\.\d+x/.test(s.text)).map((s) => s.text.trim())
622
+ : [];
623
+ const buildStrategy = (i) => {
624
+ const stratBb = {};
625
+ // Copy relevant bounding boxes for this strategy column
626
+ const prefix = ["min", "partial", "full", "best"][i];
627
+ for (const [k, v] of Object.entries(bb)) {
628
+ if (k.endsWith(`_${prefix}`)) {
629
+ stratBb[k.replace(`_${prefix}`, "")] = v;
630
+ }
631
+ }
632
+ return {
633
+ arv: parseCurrency(arvRow[i] ?? ""),
634
+ asIsValue: parseCurrency(asIsRow[i] ?? ""),
635
+ rehab: parseCurrency(rehabRow[i] ?? ""),
636
+ perSqft: parseCurrency(sqftRow[i] ?? ""),
637
+ basis: parseCurrency(basisRow[i] ?? ""),
638
+ netLift: parseCurrency(netLiftRow[i] ?? ""),
639
+ grossReturn: returnVals[i] ?? "",
640
+ rehabTime: parseNum(rehabTimeVals[i]),
641
+ estimatedTTS: parseNum(ttsVals[i]),
642
+ cushion: parseNum(cushionVals[i]),
643
+ totalTime: parseNum(totalTimeVals[i]),
644
+ annualizedReturn: annReturnVals[i] ?? "",
645
+ boundingBoxes: stratBb,
646
+ };
647
+ };
648
+ return {
649
+ min: buildStrategy(0),
650
+ partial: buildStrategy(1),
651
+ full: buildStrategy(2),
652
+ best: buildStrategy(3),
653
+ boundingBoxes: bb,
654
+ };
655
+ }
656
+ function parseMarketDemand(body) {
657
+ const bb = {};
658
+ // Market Demand line: "Market Demand", score, "Return", ...
659
+ const demandLine = body.find((l) => l.segments.some((s) => s.text.includes("Market Demand")));
660
+ let score = null;
661
+ if (demandLine) {
662
+ const scoreSeg = demandLine.segments.find((s) => /^\d+$/.test(s.text.trim()));
663
+ if (scoreSeg) {
664
+ score = parseInt(scoreSeg.text.trim(), 10);
665
+ bb.score = toBBox(scoreSeg, demandLine);
666
+ }
667
+ }
668
+ // "Strong"/"Moderate"/"Weak" label — appears after the Market Demand line,
669
+ // may share a line with other segments. Search only after the demand line.
670
+ const demandIdx = demandLine ? body.indexOf(demandLine) : -1;
671
+ const afterDemand = demandIdx >= 0 ? body.slice(demandIdx + 1) : body;
672
+ const strongLine = afterDemand.find((l) => l.segments.some((s) => /^(Strong|Moderate|Weak)$/i.test(s.text.trim())));
673
+ const strongSeg = strongLine?.segments.find((s) => /^(Strong|Moderate|Weak)$/i.test(s.text.trim()));
674
+ const label = strongSeg?.text.trim() ?? "";
675
+ if (strongSeg && strongLine)
676
+ bb.label = toBBox(strongSeg, strongLine);
677
+ // Left-side fields — value is in seg[1], but may be merged with right-side table label.
678
+ // Only take the portion before known table labels (e.g., "Rehab Time", "Estim TTS").
679
+ const tableLabels = /\b(Rehab Time|Estim TTS|Cushion|Total Time|Annualized)/;
680
+ const leftField = (fieldLabel, bbKey) => {
681
+ const line = body.find((l) => l.segments[0]?.text.trim() === fieldLabel ||
682
+ l.segments[0]?.text.includes(fieldLabel));
683
+ if (!line || line.segments.length < 2)
684
+ return "";
685
+ const valSeg = line.segments[1];
686
+ if (valSeg && valSeg.x < 350) {
687
+ bb[bbKey] = toBBox(valSeg, line);
688
+ let val = valSeg.text.trim();
689
+ // Strip any table label that got merged into this segment
690
+ const tableMatch = val.match(tableLabels);
691
+ if (tableMatch)
692
+ val = val.slice(0, tableMatch.index).trim();
693
+ return val;
694
+ }
695
+ return "";
696
+ };
697
+ return {
698
+ score,
699
+ label,
700
+ location: leftField("Location", "location"),
701
+ inventory: leftField("Inventory", "inventory"),
702
+ medianTTS: leftField("Median TTS", "medianTTS"),
703
+ percentRemodeled: leftField("% Remodeled", "percentRemodeled"),
704
+ boundingBoxes: bb,
705
+ };
706
+ }
707
+ // ── Comparables (Pages 6, 11, 15-17) ────────────────────────────────────────
708
+ /** Condition group headers in the comp tables */
709
+ const CONDITION_GROUPS = [
710
+ "Newly Built", "Full Remodel", "Partial Remodel", "Maintained",
711
+ "Moderate", "Poor", "Very Poor", "Unsalvageable",
712
+ ];
713
+ function parseComparablesSection(lines, sectionTitle) {
714
+ const headerIdx = lines.findIndex((l) => l.fullText.includes(sectionTitle));
715
+ if (headerIdx < 0)
716
+ return { title: sectionTitle, comparables: [] };
717
+ const headerPage = lines[headerIdx].page;
718
+ // Collect table lines from this section until next section or photo pages
719
+ const tableLines = [];
720
+ for (let i = headerIdx + 1; i < lines.length; i++) {
721
+ const line = lines[i];
722
+ if (/^Photos for/i.test(line.fullText))
723
+ break;
724
+ if (line.page > headerPage + 5)
725
+ break;
726
+ if (/^(Closest Market Comparables|Additional Comparables|Additional Comps Excluded|Budget Flags|Budget Line Items)$/i.test(line.fullText) &&
727
+ line.page !== headerPage)
728
+ break;
729
+ if (isHeaderOrFooter(line))
730
+ continue;
731
+ tableLines.push(line);
732
+ }
733
+ // Parse comp rows
734
+ let currentGroup = "";
735
+ const comparables = [];
736
+ for (const line of tableLines) {
737
+ const firstSeg = line.segments[0]?.text.trim();
738
+ // Condition group header: "#" + group name
739
+ if (firstSeg === "#" && line.segments.length >= 2) {
740
+ const groupName = line.segments[1]?.text.trim();
741
+ if (CONDITION_GROUPS.some((g) => groupName === g)) {
742
+ currentGroup = groupName;
743
+ }
744
+ continue;
745
+ }
746
+ // Skip subject and non-data lines
747
+ if (firstSeg === "S" || firstSeg === "#")
748
+ continue;
749
+ if (!/^\d+$/.test(firstSeg ?? ""))
750
+ continue;
751
+ const comp = parseCompRow(line, parseInt(firstSeg, 10), currentGroup);
752
+ if (comp)
753
+ comparables.push(comp);
754
+ }
755
+ return { title: sectionTitle, comparables };
756
+ }
757
+ function parseCompRow(line, num, group) {
758
+ const bb = {};
759
+ // Address: segments with x < 155 (after the # segment)
760
+ const addrSegs = line.segments.filter((s) => s.x > 40 && s.x < 155);
761
+ const address = addrSegs.map((s) => s.text.trim()).join(" ");
762
+ if (addrSegs[0])
763
+ bb.address = toBBox(addrSegs[0], line);
764
+ // Data: segments with x >= 150 — concatenate and parse
765
+ const dataSegs = line.segments.filter((s) => s.x >= 150);
766
+ const dataText = dataSegs.map((s) => s.text.trim()).join(" ");
767
+ // Parse numeric data after address.
768
+ // Pattern: [Type] sqft bd bth year stories lot dist [flags] [grg] COE SP $/sqft C TTS [score]
769
+ // Type is optional (e.g., "C", "TH", "SF", "QP") — strip it if present
770
+ const stripped = dataText.replace(/^[A-Z]{1,3}\s+/, "");
771
+ const m = stripped.match(/^([\d,]+)\s+(\d+)\s+([\d.]+)\s+(\d{4})\s+([\d.]+)\s+([\d.]+|unkn)\s+([\d.]+)\s+(.+)$/);
772
+ if (!m)
773
+ return null;
774
+ const sqft = parseInt(m[1].replace(/,/g, ""), 10);
775
+ const beds = parseInt(m[2], 10);
776
+ const baths = parseFloat(m[3]);
777
+ const yearBuilt = parseInt(m[4], 10);
778
+ const stories = parseFloat(m[5]);
779
+ const lot = m[6] === "unkn" ? null : parseFloat(m[6]);
780
+ const dist = parseFloat(m[7]);
781
+ const tail = m[8];
782
+ // Parse the tail: [flags] [grg] COE SP $/sqft C TTS [score]
783
+ const tailMatch = tail.match(/^(\d+)?\s*(\d+)?\s*(\d{1,2}\/\d{1,2}\/\d{2,4})\s+\$([\d,]+)\s+\$(\d+)\s+([\d.]+)\s+(\d+)\s*([\d.]+)?$/);
784
+ let flags = null;
785
+ let garage = null;
786
+ let closeOfEscrow = "";
787
+ let salePrice = null;
788
+ let pricePerSqft = null;
789
+ let condition = null;
790
+ let timeToSale = null;
791
+ let score = null;
792
+ if (tailMatch) {
793
+ const pre1 = tailMatch[1];
794
+ const pre2 = tailMatch[2];
795
+ if (pre2 !== undefined) {
796
+ flags = parseInt(pre1, 10);
797
+ garage = parseInt(pre2, 10);
798
+ }
799
+ else if (pre1 !== undefined) {
800
+ garage = parseInt(pre1, 10);
801
+ }
802
+ closeOfEscrow = tailMatch[3];
803
+ salePrice = parseInt(tailMatch[4].replace(/,/g, ""), 10);
804
+ pricePerSqft = parseInt(tailMatch[5], 10);
805
+ condition = parseFloat(tailMatch[6]);
806
+ timeToSale = parseInt(tailMatch[7], 10);
807
+ score = tailMatch[8] !== undefined ? parseFloat(tailMatch[8]) : null;
808
+ }
809
+ // Attach bounding boxes — map each segment to a field by x-coordinate.
810
+ // Segments are often merged, so we use the x position to determine which
811
+ // field the segment primarily represents.
812
+ for (const seg of dataSegs) {
813
+ const t = seg.text.trim();
814
+ const x = seg.x;
815
+ if (x < 200 && !bb.sqft) {
816
+ // First data segment covers sqft, beds, baths, yearBuilt (often merged)
817
+ const box = toBBox(seg, line);
818
+ bb.sqft = box;
819
+ bb.beds = box;
820
+ bb.baths = box;
821
+ bb.yearBuilt = box;
822
+ }
823
+ else if (x >= 250 && x < 295 && !bb.stories) {
824
+ bb.stories = toBBox(seg, line);
825
+ }
826
+ else if (x >= 285 && x < 325 && !bb.lot) {
827
+ bb.lot = toBBox(seg, line);
828
+ }
829
+ else if (x >= 325 && x < 380 && /^\d/.test(t) && !bb.distance) {
830
+ bb.distance = toBBox(seg, line);
831
+ }
832
+ else if (/\d{1,2}\/\d{1,2}\/\d{2,4}/.test(t) && !bb.closeOfEscrow) {
833
+ bb.closeOfEscrow = toBBox(seg, line);
834
+ }
835
+ else if (/^\$[\d,]+$/.test(t) && t.length > 5 && !bb.salePrice) {
836
+ bb.salePrice = toBBox(seg, line);
837
+ }
838
+ else if (/^\$\d+$/.test(t) && !bb.pricePerSqft) {
839
+ bb.pricePerSqft = toBBox(seg, line);
840
+ }
841
+ else if (x >= 495 && x < 545 && /^[\d.]+$/.test(t) && !bb.condition) {
842
+ bb.condition = toBBox(seg, line);
843
+ }
844
+ else if (x >= 520 && x < 575 && /^\d+$/.test(t) && !bb.timeToSale) {
845
+ bb.timeToSale = toBBox(seg, line);
846
+ }
847
+ else if (x >= 555 && /^[\d.]+$/.test(t) && !bb.score) {
848
+ bb.score = toBBox(seg, line);
849
+ }
850
+ }
851
+ // For merged segments (e.g., "10/3/25 $750,000"), try to pick up SP from merged text
852
+ if (!bb.salePrice) {
853
+ const spSeg = dataSegs.find((s) => /\$[\d,]{4,}/.test(s.text));
854
+ if (spSeg)
855
+ bb.salePrice = toBBox(spSeg, line);
856
+ }
857
+ return {
858
+ number: num,
859
+ address,
860
+ conditionGroup: group,
861
+ sqft,
862
+ beds,
863
+ baths,
864
+ yearBuilt,
865
+ stories,
866
+ lot,
867
+ distance: dist,
868
+ flags,
869
+ garage,
870
+ closeOfEscrow,
871
+ salePrice,
872
+ pricePerSqft,
873
+ condition,
874
+ timeToSale,
875
+ score,
876
+ boundingBoxes: bb,
877
+ };
878
+ }
879
+ // ── Budget Flags (Page 18) ────────────────────────────────────────────────────
880
+ const CONCERN_LEVELS = [
881
+ "Significant Concerns",
882
+ "Medium Concerns",
883
+ "Moderate Concerns",
884
+ "Cautionary Concerns",
885
+ ];
886
+ function parseBudgetFlags(lines) {
887
+ const headerIdx = lines.findIndex((l) => /^Budget Flags$/i.test(l.fullText.trim()));
888
+ const headerPage = headerIdx >= 0 ? lines[headerIdx].page : -1;
889
+ const body = headerIdx >= 0
890
+ ? lines.filter((l) => l.page === headerPage && !isHeaderOrFooter(l) && l.y > lines[headerIdx].y)
891
+ : [];
892
+ const bb = {};
893
+ if (headerIdx >= 0) {
894
+ const hl = lines[headerIdx];
895
+ bb.title = toBBox(hl.segments[0], hl);
896
+ }
897
+ const concerns = [];
898
+ for (let i = 0; i < CONCERN_LEVELS.length; i++) {
899
+ const level = CONCERN_LEVELS[i];
900
+ const levelIdx = body.findIndex((l) => l.fullText.trim() === level);
901
+ if (levelIdx < 0)
902
+ continue;
903
+ const sectionBB = {};
904
+ const levelLine = body[levelIdx];
905
+ sectionBB.level = toBBox(levelLine.segments[0], levelLine);
906
+ // Collect items until next concern level or "Missing Line Items"
907
+ const items = [];
908
+ for (let j = levelIdx + 1; j < body.length; j++) {
909
+ const text = body[j].fullText.trim();
910
+ if (CONCERN_LEVELS.includes(text) || /^Missing Line Items$/i.test(text))
911
+ break;
912
+ if (text && !/^No line items flagged\.?$/i.test(text) && !/^Specific Line Item/i.test(text)) {
913
+ items.push(text);
914
+ sectionBB[`item${items.length}`] = toBBox(body[j].segments[0], body[j]);
915
+ }
916
+ }
917
+ concerns.push({ level, items, boundingBoxes: sectionBB });
918
+ }
919
+ // Missing Line Items
920
+ let missingLineItems = "";
921
+ const missingIdx = body.findIndex((l) => /^Missing Line Items$/i.test(l.fullText.trim()));
922
+ if (missingIdx >= 0) {
923
+ const missingLine = body[missingIdx];
924
+ bb.missingLineItems = toBBox(missingLine.segments[0], missingLine);
925
+ const textLines = [];
926
+ for (let j = missingIdx + 1; j < body.length; j++) {
927
+ const text = body[j].fullText.trim();
928
+ if (!text)
929
+ continue;
930
+ textLines.push(text);
931
+ if (!bb.missingLineItemsText) {
932
+ bb.missingLineItemsText = toBBox(body[j].segments[0], body[j]);
933
+ }
934
+ }
935
+ missingLineItems = textLines.join(" ");
936
+ }
937
+ return { concerns, missingLineItems, boundingBoxes: bb };
938
+ }
939
+ // ── Budget Line Items (Page 19) ───────────────────────────────────────────────
940
+ function parseDollarValues(segments) {
941
+ const values = [];
942
+ // Only look at segments in the dollar columns (x >= 370)
943
+ for (const seg of segments) {
944
+ if (seg.x < 370)
945
+ continue;
946
+ const matches = seg.text.match(/\$[\d,]+/g);
947
+ if (matches) {
948
+ for (const m of matches) {
949
+ values.push(parseInt(m.replace(/[$,]/g, ""), 10));
950
+ }
951
+ }
952
+ }
953
+ return values;
954
+ }
955
+ function parseBudgetLineItems(lines) {
956
+ const headerIdx = lines.findIndex((l) => /^Budget Line Items$/i.test(l.fullText.trim()));
957
+ const headerPage = headerIdx >= 0 ? lines[headerIdx].page : -1;
958
+ const body = headerIdx >= 0
959
+ ? lines.filter((l) => l.page === headerPage && !isHeaderOrFooter(l) && l.y > lines[headerIdx].y)
960
+ : [];
961
+ const bb = {};
962
+ if (headerIdx >= 0) {
963
+ const hl = lines[headerIdx];
964
+ bb.title = toBBox(hl.segments[0], hl);
965
+ }
966
+ const categories = [];
967
+ let currentCategory = null;
968
+ let totalHR = null;
969
+ let totalDM = null;
970
+ let totalUP = null;
971
+ let totalRC = null;
972
+ let totalSoft = null;
973
+ let grandTotal = null;
974
+ for (const line of body) {
975
+ const text = line.fullText.trim();
976
+ // Skip the column header row
977
+ if (/^Budget Items\b/i.test(text))
978
+ continue;
979
+ // Check if this is a Total row
980
+ if (/^Total\b/.test(text) && line.segments.some((s) => /\$/.test(s.text))) {
981
+ const vals = parseDollarValues(line.segments);
982
+ [totalHR, totalDM, totalUP, totalRC, totalSoft, grandTotal] =
983
+ vals.map((v) => v ?? null);
984
+ const totalSeg = line.segments.find((s) => /Total/.test(s.text));
985
+ if (totalSeg)
986
+ bb.total = toBBox(totalSeg, line);
987
+ const lastSeg = line.segments[line.segments.length - 1];
988
+ if (lastSeg)
989
+ bb.grandTotal = toBBox(lastSeg, line);
990
+ continue;
991
+ }
992
+ // Check if this is a numbered item row.
993
+ // Case 1: first segment is just a number (e.g., "1" at x~32)
994
+ // Case 2: number and name merged (e.g., "1 Dumpster / Debris Removal" at x~31)
995
+ const firstSeg = line.segments[0];
996
+ const separateNum = firstSeg && firstSeg.x < 50 && /^\d+$/.test(firstSeg.text.trim());
997
+ const mergedNum = firstSeg && firstSeg.x < 50 && /^\d+\s+\S/.test(firstSeg.text.trim());
998
+ const isItemRow = separateNum || mergedNum;
999
+ if (isItemRow) {
1000
+ const itemBB = {};
1001
+ let num;
1002
+ let name;
1003
+ if (separateNum) {
1004
+ num = parseInt(firstSeg.text.trim(), 10);
1005
+ itemBB.number = toBBox(firstSeg, line);
1006
+ const nameSeg = line.segments.find((s) => s.x >= 55 && s.x < 200);
1007
+ name = nameSeg?.text.trim() ?? "";
1008
+ if (nameSeg)
1009
+ itemBB.name = toBBox(nameSeg, line);
1010
+ }
1011
+ else {
1012
+ // Number and name merged in one segment
1013
+ const match = firstSeg.text.trim().match(/^(\d+)\s+(.+)$/);
1014
+ num = parseInt(match[1], 10);
1015
+ name = match[2].trim();
1016
+ itemBB.number = toBBox(firstSeg, line);
1017
+ itemBB.name = toBBox(firstSeg, line);
1018
+ }
1019
+ // Description segment at x~258
1020
+ const descSeg = line.segments.find((s) => s.x >= 200 && s.x < 370);
1021
+ const description = descSeg?.text.trim() ?? "";
1022
+ if (descSeg)
1023
+ itemBB.description = toBBox(descSeg, line);
1024
+ const vals = parseDollarValues(line.segments);
1025
+ const [hr = null, dm = null, up = null, rc = null, soft = null, total = null] = vals.map((v) => v ?? null);
1026
+ // Bounding box for total (last segment)
1027
+ const lastSeg = line.segments[line.segments.length - 1];
1028
+ if (lastSeg && /\$/.test(lastSeg.text))
1029
+ itemBB.total = toBBox(lastSeg, line);
1030
+ const item = {
1031
+ number: num,
1032
+ name,
1033
+ description,
1034
+ hr, dm, up, rc, soft, total,
1035
+ boundingBoxes: itemBB,
1036
+ };
1037
+ if (currentCategory) {
1038
+ currentCategory.items.push(item);
1039
+ }
1040
+ }
1041
+ else if (line.segments.some((s) => /\$/.test(s.text))) {
1042
+ // Category row: has dollar values but no leading number
1043
+ const catBB = {};
1044
+ const catNameSeg = line.segments.find((s) => s.x < 200);
1045
+ const catName = catNameSeg?.text.trim() ?? "";
1046
+ if (catNameSeg)
1047
+ catBB.name = toBBox(catNameSeg, line);
1048
+ const vals = parseDollarValues(line.segments);
1049
+ const [hr = null, dm = null, up = null, rc = null, soft = null, total = null] = vals.map((v) => v ?? null);
1050
+ const lastSeg = line.segments[line.segments.length - 1];
1051
+ if (lastSeg && /\$/.test(lastSeg.text))
1052
+ catBB.total = toBBox(lastSeg, line);
1053
+ currentCategory = {
1054
+ name: catName,
1055
+ hr, dm, up, rc, soft, total,
1056
+ items: [],
1057
+ boundingBoxes: catBB,
1058
+ };
1059
+ categories.push(currentCategory);
1060
+ }
1061
+ }
1062
+ return {
1063
+ categories,
1064
+ totalHR, totalDM, totalUP, totalRC, totalSoft, grandTotal,
1065
+ boundingBoxes: bb,
1066
+ };
1067
+ }