@parseo/appraisals 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -0
- package/dist/form-1004mc/extract-checkboxes.d.ts +43 -0
- package/dist/form-1004mc/extract-checkboxes.d.ts.map +1 -0
- package/dist/form-1004mc/extract-checkboxes.js +145 -0
- package/dist/form-1004mc/index.d.ts +3 -0
- package/dist/form-1004mc/index.d.ts.map +1 -0
- package/dist/form-1004mc/index.js +1 -0
- package/dist/form-1004mc/parse-page1.d.ts +8 -0
- package/dist/form-1004mc/parse-page1.d.ts.map +1 -0
- package/dist/form-1004mc/parse-page1.js +760 -0
- package/dist/form-1004mc/parse-sales.d.ts +6 -0
- package/dist/form-1004mc/parse-sales.d.ts.map +1 -0
- package/dist/form-1004mc/parse-sales.js +505 -0
- package/dist/form-1004mc/parser.d.ts +5 -0
- package/dist/form-1004mc/parser.d.ts.map +1 -0
- package/dist/form-1004mc/parser.js +437 -0
- package/dist/form-1004mc/types.d.ts +302 -0
- package/dist/form-1004mc/types.d.ts.map +1 -0
- package/dist/form-1004mc/types.js +1 -0
- package/dist/form-1073/index.d.ts +3 -0
- package/dist/form-1073/index.d.ts.map +1 -0
- package/dist/form-1073/index.js +1 -0
- package/dist/form-1073/parse-page1.d.ts +8 -0
- package/dist/form-1073/parse-page1.d.ts.map +1 -0
- package/dist/form-1073/parse-page1.js +704 -0
- package/dist/form-1073/parse-page2.d.ts +6 -0
- package/dist/form-1073/parse-page2.d.ts.map +1 -0
- package/dist/form-1073/parse-page2.js +438 -0
- package/dist/form-1073/parse-sales.d.ts +7 -0
- package/dist/form-1073/parse-sales.d.ts.map +1 -0
- package/dist/form-1073/parse-sales.js +477 -0
- package/dist/form-1073/parser.d.ts +5 -0
- package/dist/form-1073/parser.d.ts.map +1 -0
- package/dist/form-1073/parser.js +102 -0
- package/dist/form-1073/types.d.ts +300 -0
- package/dist/form-1073/types.d.ts.map +1 -0
- package/dist/form-1073/types.js +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/richer-values/index.d.ts +3 -0
- package/dist/richer-values/index.d.ts.map +1 -0
- package/dist/richer-values/index.js +1 -0
- package/dist/richer-values/parser.d.ts +5 -0
- package/dist/richer-values/parser.d.ts.map +1 -0
- package/dist/richer-values/parser.js +1067 -0
- package/dist/richer-values/types.d.ts +225 -0
- package/dist/richer-values/types.d.ts.map +1 -0
- package/dist/richer-values/types.js +1 -0
- package/package.json +24 -0
|
@@ -0,0 +1,1067 @@
|
|
|
1
|
+
import { extractLines, UnrecognizedFormatError, toBBox } from "@parseo/shared";
|
|
2
|
+
export async function parseRicherValuesReport(buffer) {
|
|
3
|
+
const lines = await extractLines(buffer);
|
|
4
|
+
return parseRicherValuesReportFromLines(lines);
|
|
5
|
+
}
|
|
6
|
+
export function parseRicherValuesReportFromLines(lines) {
|
|
7
|
+
// Format fingerprint: Richer Values reports start with "Renovation Analysis" or
|
|
8
|
+
// similar report type, followed by an address, and have "Valuation Summary" on page 2
|
|
9
|
+
const head = lines.slice(0, 15).map((l) => l.fullText).join("\n");
|
|
10
|
+
if (!/Renovation Analysis|Valuation Summary/i.test(head)) {
|
|
11
|
+
throw new UnrecognizedFormatError("RicherValues", "first 15 lines do not contain a RicherValues report signature");
|
|
12
|
+
}
|
|
13
|
+
const coverPage = parseCoverPage(lines);
|
|
14
|
+
const valuationSummary = parseValuationSummary(lines);
|
|
15
|
+
const valuationPage = parseValuationPage(lines);
|
|
16
|
+
const closestComparables = parseComparablesSection(lines, "Closest Market Comparables");
|
|
17
|
+
const additionalComparables = parseComparablesSection(lines, "Additional Comparables");
|
|
18
|
+
const excludedComparables = parseComparablesSection(lines, "Additional Comps Excluded From the Analysis");
|
|
19
|
+
const budgetFlags = parseBudgetFlags(lines);
|
|
20
|
+
const budgetLineItems = parseBudgetLineItems(lines);
|
|
21
|
+
return {
|
|
22
|
+
coverPage,
|
|
23
|
+
valuationSummary,
|
|
24
|
+
valuationPage,
|
|
25
|
+
closestComparables,
|
|
26
|
+
additionalComparables,
|
|
27
|
+
excludedComparables,
|
|
28
|
+
budgetFlags,
|
|
29
|
+
budgetLineItems,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
// ── Cover Page (Page 1) ─────────────────────────────────────────────────────
|
|
33
|
+
function parseCoverPage(lines) {
|
|
34
|
+
const page1 = lines.filter((l) => l.page === 1);
|
|
35
|
+
const bb = {};
|
|
36
|
+
// Report type is the first substantial text line (e.g. "Renovation Analysis")
|
|
37
|
+
const reportTypeLine = page1.find((l) => /renovation analysis|desktop review|bpo|appraisal/i.test(l.fullText));
|
|
38
|
+
const reportType = reportTypeLine?.fullText ?? "";
|
|
39
|
+
if (reportTypeLine?.segments[0])
|
|
40
|
+
bb.reportType = toBBox(reportTypeLine.segments[0], reportTypeLine);
|
|
41
|
+
// Address line
|
|
42
|
+
const addressLine = page1.find((l) => /\d+.*,\s*[A-Z]{2},?\s*\d{5}/.test(l.fullText));
|
|
43
|
+
const address = addressLine?.fullText ?? "";
|
|
44
|
+
if (addressLine?.segments[0])
|
|
45
|
+
bb.address = toBBox(addressLine.segments[0], addressLine);
|
|
46
|
+
// Property details line — e.g. "1,504 sqft 3 + 2.00; 1962 SFR"
|
|
47
|
+
const detailsLine = page1.find((l) => /sqft/i.test(l.fullText));
|
|
48
|
+
const details = parsePropertyDetails(detailsLine?.fullText ?? "");
|
|
49
|
+
if (detailsLine?.segments[0])
|
|
50
|
+
bb.propertyDetails = toBBox(detailsLine.segments[0], detailsLine);
|
|
51
|
+
// Effective date
|
|
52
|
+
const dateLine = page1.find((l) => /effective date/i.test(l.fullText));
|
|
53
|
+
const effectiveDate = parseEffectiveDate(dateLine?.fullText ?? "");
|
|
54
|
+
if (dateLine?.segments[0])
|
|
55
|
+
bb.effectiveDate = toBBox(dateLine.segments[0], dateLine);
|
|
56
|
+
// Prepared For block
|
|
57
|
+
const prepIdx = page1.findIndex((l) => /prepared for/i.test(l.fullText));
|
|
58
|
+
const preparedFor = parsePreparedFor(page1);
|
|
59
|
+
if (prepIdx >= 0 && page1[prepIdx + 1]?.segments[0]) {
|
|
60
|
+
bb.preparedForName = toBBox(page1[prepIdx + 1].segments[0], page1[prepIdx + 1]);
|
|
61
|
+
}
|
|
62
|
+
return {
|
|
63
|
+
reportType,
|
|
64
|
+
address,
|
|
65
|
+
...details,
|
|
66
|
+
effectiveDate,
|
|
67
|
+
preparedFor,
|
|
68
|
+
boundingBoxes: bb,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
function parsePropertyDetails(text) {
|
|
72
|
+
const sqftMatch = text.match(/([\d,]+)\s*sqft/i);
|
|
73
|
+
const sqft = sqftMatch ? parseInt(sqftMatch[1].replace(/,/g, ""), 10) : null;
|
|
74
|
+
const bedBathMatch = text.match(/(\d+)\s*\+\s*([\d.]+)/);
|
|
75
|
+
const beds = bedBathMatch ? parseInt(bedBathMatch[1], 10) : null;
|
|
76
|
+
const baths = bedBathMatch ? parseFloat(bedBathMatch[2]) : null;
|
|
77
|
+
const yearMatch = text.match(/(\d{4})\s+([A-Z]{2,})/);
|
|
78
|
+
const yearBuilt = yearMatch ? parseInt(yearMatch[1], 10) : null;
|
|
79
|
+
const propertyType = yearMatch ? yearMatch[2] : "";
|
|
80
|
+
return { sqft, beds, baths, yearBuilt, propertyType };
|
|
81
|
+
}
|
|
82
|
+
function parseEffectiveDate(text) {
|
|
83
|
+
const match = text.match(/effective date:\s*(.+)/i);
|
|
84
|
+
if (!match)
|
|
85
|
+
return "";
|
|
86
|
+
const dateStr = match[1].trim();
|
|
87
|
+
const parsed = new Date(dateStr);
|
|
88
|
+
if (isNaN(parsed.getTime()))
|
|
89
|
+
return dateStr;
|
|
90
|
+
const yyyy = parsed.getFullYear();
|
|
91
|
+
const mm = String(parsed.getMonth() + 1).padStart(2, "0");
|
|
92
|
+
const dd = String(parsed.getDate()).padStart(2, "0");
|
|
93
|
+
return `${yyyy}-${mm}-${dd}`;
|
|
94
|
+
}
|
|
95
|
+
function parsePreparedFor(page1Lines) {
|
|
96
|
+
const prepIdx = page1Lines.findIndex((l) => /prepared for/i.test(l.fullText));
|
|
97
|
+
if (prepIdx < 0)
|
|
98
|
+
return { name: "", address: "" };
|
|
99
|
+
const afterLines = page1Lines.slice(prepIdx + 1);
|
|
100
|
+
const name = afterLines[0]?.fullText ?? "";
|
|
101
|
+
const addressParts = afterLines.slice(1).map((l) => l.fullText);
|
|
102
|
+
const address = addressParts.join(", ");
|
|
103
|
+
return { name, address };
|
|
104
|
+
}
|
|
105
|
+
// ── Valuation Summary and Parameters (Pages 2-4) ────────────────────────────
|
|
106
|
+
function getBodyLines(lines) {
|
|
107
|
+
return lines.filter((l) => l.page >= 2 &&
|
|
108
|
+
l.page <= 4 &&
|
|
109
|
+
!isHeaderOrFooter(l));
|
|
110
|
+
}
|
|
111
|
+
function isHeaderOrFooter(l) {
|
|
112
|
+
const t = l.fullText;
|
|
113
|
+
return (/^Renovation Analysis$/i.test(t) ||
|
|
114
|
+
/^\d+.*,\s*[A-Z]{2},?\s*\d{5}$/.test(t) ||
|
|
115
|
+
/^For a complete set of terms/i.test(t));
|
|
116
|
+
}
|
|
117
|
+
function parseValuationSummary(lines) {
|
|
118
|
+
const body = getBodyLines(lines);
|
|
119
|
+
const bb = {};
|
|
120
|
+
const sectionLine = body.find((l) => /Valuation Summary and Parameters/i.test(l.fullText));
|
|
121
|
+
if (sectionLine?.segments[0])
|
|
122
|
+
bb.sectionTitle = toBBox(sectionLine.segments[0], sectionLine);
|
|
123
|
+
return {
|
|
124
|
+
commentary: parseValuationCommentary(body),
|
|
125
|
+
propertyDataSources: parsePropertyDataSources(body),
|
|
126
|
+
subjectPropertyDetails: parseSubjectPropertyDetails(body),
|
|
127
|
+
comparableSearchParameters: parseComparableSearchParameters(body),
|
|
128
|
+
verificationOfCondition: parseVerificationOfCondition(body),
|
|
129
|
+
listingHistory: parseListingHistory(body),
|
|
130
|
+
neighborhood: parseNeighborhood(body),
|
|
131
|
+
preparedBy: parsePreparedBySection(body),
|
|
132
|
+
boundingBoxes: bb,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
// ── Valuation Commentary ────────────────────────────────────────────────────
|
|
136
|
+
const FIELD_BOUNDARY = /^(Hyper-Local Neighborhood|Subject Property Assessment|Budget Assessment|Budget Flags|Estimated Valuation|Valuation Commentary):/i;
|
|
137
|
+
const SECTION_BOUNDARY = /^(Property Data Sources|Subject Property Details|Comparable Search Parameters|Neighborhood:|Verification of Physical|External Data Sources|Prepared By:|Value Drivers|Distance-Based Comps:|Size-Based Comps:|Additional Comps:|Custom Comp Search:|Additional Analyses Conducted:)/i;
|
|
138
|
+
function extractCommentaryField(body, label, bb, bbKey) {
|
|
139
|
+
const idx = body.findIndex((l) => l.fullText.includes(label));
|
|
140
|
+
if (idx < 0)
|
|
141
|
+
return "";
|
|
142
|
+
const firstLine = body[idx];
|
|
143
|
+
const afterLabel = firstLine.fullText.slice(firstLine.fullText.indexOf(label) + label.length).trim();
|
|
144
|
+
// Attach bounding box to the label's segment
|
|
145
|
+
if (bb && bbKey) {
|
|
146
|
+
for (const seg of firstLine.segments) {
|
|
147
|
+
if (seg.text.includes(label.replace(":", ""))) {
|
|
148
|
+
bb[bbKey] = toBBox(seg, firstLine);
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
const parts = [afterLabel];
|
|
154
|
+
for (let i = idx + 1; i < body.length; i++) {
|
|
155
|
+
const text = body[i].fullText;
|
|
156
|
+
if (FIELD_BOUNDARY.test(text) || SECTION_BOUNDARY.test(text))
|
|
157
|
+
break;
|
|
158
|
+
parts.push(text);
|
|
159
|
+
}
|
|
160
|
+
return parts.join(" ").trim();
|
|
161
|
+
}
|
|
162
|
+
function parseValuationCommentary(body) {
|
|
163
|
+
const bb = {};
|
|
164
|
+
return {
|
|
165
|
+
hyperLocalNeighborhood: extractCommentaryField(body, "Hyper-Local Neighborhood:", bb, "hyperLocalNeighborhood"),
|
|
166
|
+
subjectPropertyAssessment: extractCommentaryField(body, "Subject Property Assessment:", bb, "subjectPropertyAssessment"),
|
|
167
|
+
budgetAssessment: extractCommentaryField(body, "Budget Assessment:", bb, "budgetAssessment"),
|
|
168
|
+
budgetFlags: extractCommentaryField(body, "Budget Flags:", bb, "budgetFlags"),
|
|
169
|
+
estimatedValuation: extractCommentaryField(body, "Estimated Valuation:", bb, "estimatedValuation"),
|
|
170
|
+
boundingBoxes: bb,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
// ── Property Data Sources ───────────────────────────────────────────────────
|
|
174
|
+
function parsePropertyDataSources(body) {
|
|
175
|
+
const headerIdx = body.findIndex((l) => /^Property Data Sources$/i.test(l.fullText));
|
|
176
|
+
if (headerIdx < 0)
|
|
177
|
+
return [];
|
|
178
|
+
const sources = ["Used by RV", "Upload", "MLS", "County", "Manual"];
|
|
179
|
+
const rows = [];
|
|
180
|
+
for (const line of body.slice(headerIdx + 1)) {
|
|
181
|
+
const source = sources.find((s) => line.fullText.startsWith(s));
|
|
182
|
+
if (!source) {
|
|
183
|
+
if (rows.length > 0 && /Subject Property/i.test(line.fullText))
|
|
184
|
+
break;
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
const bb = {};
|
|
188
|
+
bb.source = toBBox(line.segments[0], line);
|
|
189
|
+
const segs = line.segments.slice(1);
|
|
190
|
+
const colNames = ["above", "below", "total", "beds", "baths", "stories", "year", "lot", "garage"];
|
|
191
|
+
const vals = [];
|
|
192
|
+
for (let i = 0; i < segs.length; i++) {
|
|
193
|
+
const t = segs[i].text.trim();
|
|
194
|
+
if (t === "-" || t === "") {
|
|
195
|
+
vals.push(null);
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
vals.push(parseFloat(t.replace(/,/g, "")));
|
|
199
|
+
if (colNames[i])
|
|
200
|
+
bb[colNames[i]] = toBBox(segs[i], line);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
rows.push({
|
|
204
|
+
source,
|
|
205
|
+
above: vals[0] ?? null,
|
|
206
|
+
below: vals[1] ?? null,
|
|
207
|
+
total: vals[2] ?? null,
|
|
208
|
+
beds: vals[3] ?? null,
|
|
209
|
+
baths: vals[4] ?? null,
|
|
210
|
+
stories: vals[5] ?? null,
|
|
211
|
+
year: vals[6] ?? null,
|
|
212
|
+
lot: vals[7] ?? null,
|
|
213
|
+
garage: vals[8] ?? null,
|
|
214
|
+
boundingBoxes: bb,
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
return rows;
|
|
218
|
+
}
|
|
219
|
+
// ── Subject Property Details ────────────────────────────────────────────────
|
|
220
|
+
function parseSubjectPropertyDetails(body) {
|
|
221
|
+
const bb = {};
|
|
222
|
+
const secIdx = body.findIndex((l) => /Subject Property Details/i.test(l.fullText));
|
|
223
|
+
const address = findLabelValue(body, secIdx, "Address", bb, "address");
|
|
224
|
+
const apn = findLabelValue(body, secIdx, "Assessor Parcel Number", bb, "apn");
|
|
225
|
+
const comparisonMetrics = findLabelValue(body, secIdx, "Subject Property Comparison Metrics", bb, "comparisonMetrics");
|
|
226
|
+
// Current Use row
|
|
227
|
+
const currentUseLine = body.find((l) => l.page >= 2 && /^Current Use\b/i.test(l.fullText));
|
|
228
|
+
const currentUse = parseCurrentUseRow(currentUseLine);
|
|
229
|
+
if (currentUseLine?.segments[0])
|
|
230
|
+
bb.currentUse = toBBox(currentUseLine.segments[0], currentUseLine);
|
|
231
|
+
// Percentile row
|
|
232
|
+
const percentileLine = body.find((l) => l.page >= 2 && /^Percentile\b/i.test(l.fullText));
|
|
233
|
+
const percentile = parsePercentileRow(percentileLine);
|
|
234
|
+
if (percentileLine?.segments[0])
|
|
235
|
+
bb.percentile = toBBox(percentileLine.segments[0], percentileLine);
|
|
236
|
+
// Projected Use
|
|
237
|
+
const projectedLine = body.find((l) => l.page >= 2 && /^Projected Use\b/i.test(l.fullText));
|
|
238
|
+
const projectedUse = projectedLine
|
|
239
|
+
? projectedLine.segments.slice(1).map((s) => s.text).join(" ").trim()
|
|
240
|
+
: "";
|
|
241
|
+
if (projectedLine?.segments[0])
|
|
242
|
+
bb.projectedUse = toBBox(projectedLine.segments[0], projectedLine);
|
|
243
|
+
return {
|
|
244
|
+
address,
|
|
245
|
+
apn,
|
|
246
|
+
comparisonMetrics,
|
|
247
|
+
currentUse,
|
|
248
|
+
percentile,
|
|
249
|
+
projectedUse,
|
|
250
|
+
boundingBoxes: bb,
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
function findLabelValue(body, afterIdx, label, bb, bbKey) {
|
|
254
|
+
if (afterIdx < 0)
|
|
255
|
+
return "";
|
|
256
|
+
const line = body.slice(afterIdx).find((l) => l.segments.length >= 2 && l.segments[0].text.includes(label));
|
|
257
|
+
if (!line)
|
|
258
|
+
return "";
|
|
259
|
+
if (bb && bbKey && line.segments[1]) {
|
|
260
|
+
bb[bbKey] = toBBox(line.segments[1], line);
|
|
261
|
+
}
|
|
262
|
+
return line.segments.slice(1).map((s) => s.text).join(" ").trim();
|
|
263
|
+
}
|
|
264
|
+
function parseCurrentUseRow(line) {
|
|
265
|
+
if (!line)
|
|
266
|
+
return { type: "", sqft: null, beds: null, baths: null, yearBuilt: null, acres: null };
|
|
267
|
+
const segs = line.segments.slice(1);
|
|
268
|
+
const vals = segs.map((s) => s.text.trim());
|
|
269
|
+
return {
|
|
270
|
+
type: vals[0] ?? "",
|
|
271
|
+
sqft: parseNum(vals[1]),
|
|
272
|
+
beds: parseNum(vals[2]),
|
|
273
|
+
baths: parseNum(vals[3]),
|
|
274
|
+
yearBuilt: parseNum(vals[4]),
|
|
275
|
+
acres: parseNum(vals[5]),
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
function parsePercentileRow(line) {
|
|
279
|
+
if (!line)
|
|
280
|
+
return { sqft: "", beds: "", baths: "", yearBuilt: "", acres: "" };
|
|
281
|
+
const segs = line.segments.slice(1);
|
|
282
|
+
const vals = segs.map((s) => s.text.trim());
|
|
283
|
+
return {
|
|
284
|
+
sqft: vals[0] ?? "",
|
|
285
|
+
beds: vals[1] ?? "",
|
|
286
|
+
baths: vals[2] ?? "",
|
|
287
|
+
yearBuilt: vals[3] ?? "",
|
|
288
|
+
acres: vals[4] ?? "",
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
function parseNum(val) {
|
|
292
|
+
if (!val)
|
|
293
|
+
return null;
|
|
294
|
+
const clean = val.replace(/,/g, "").trim();
|
|
295
|
+
if (clean === "-" || clean === "")
|
|
296
|
+
return null;
|
|
297
|
+
const n = parseFloat(clean);
|
|
298
|
+
return isNaN(n) ? null : n;
|
|
299
|
+
}
|
|
300
|
+
// ── Comparable Search Parameters ────────────────────────────────────────────
|
|
301
|
+
function parseComparableSearchParameters(body) {
|
|
302
|
+
const bb = {};
|
|
303
|
+
return {
|
|
304
|
+
distanceBasedComps: extractCommentaryField(body, "Distance-Based Comps:", bb, "distanceBasedComps"),
|
|
305
|
+
sizeBasedComps: extractCommentaryField(body, "Size-Based Comps:", bb, "sizeBasedComps"),
|
|
306
|
+
additionalComps: extractCommentaryField(body, "Additional Comps:", bb, "additionalComps"),
|
|
307
|
+
customCompSearch: extractCommentaryField(body, "Custom Comp Search:", bb, "customCompSearch"),
|
|
308
|
+
additionalAnalyses: extractCommentaryField(body, "Additional Analyses Conducted:", bb, "additionalAnalyses"),
|
|
309
|
+
boundingBoxes: bb,
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
// ── Verification of Condition ───────────────────────────────────────────────
|
|
313
|
+
function parseVerificationOfCondition(body) {
|
|
314
|
+
const idx = body.findIndex((l) => /Verification of Physical Condition/i.test(l.fullText));
|
|
315
|
+
if (idx < 0)
|
|
316
|
+
return "";
|
|
317
|
+
const parts = [];
|
|
318
|
+
for (let i = idx + 1; i < body.length; i++) {
|
|
319
|
+
const t = body[i].fullText;
|
|
320
|
+
if (/Subject Property Listing History/i.test(t))
|
|
321
|
+
break;
|
|
322
|
+
parts.push(t);
|
|
323
|
+
}
|
|
324
|
+
return parts.join(" ").trim();
|
|
325
|
+
}
|
|
326
|
+
// ── Listing History ─────────────────────────────────────────────────────────
|
|
327
|
+
function parseListingHistory(body) {
|
|
328
|
+
const idx = body.findIndex((l) => /Subject Property Listing History/i.test(l.fullText));
|
|
329
|
+
if (idx < 0)
|
|
330
|
+
return "";
|
|
331
|
+
const parts = [];
|
|
332
|
+
for (let i = idx + 1; i < body.length; i++) {
|
|
333
|
+
const t = body[i].fullText;
|
|
334
|
+
if (/^Neighborhood:/i.test(t))
|
|
335
|
+
break;
|
|
336
|
+
parts.push(t);
|
|
337
|
+
}
|
|
338
|
+
return parts.join(" ").trim();
|
|
339
|
+
}
|
|
340
|
+
// ── Neighborhood ────────────────────────────────────────────────────────────
|
|
341
|
+
function parseNeighborhood(body) {
|
|
342
|
+
const bb = {};
|
|
343
|
+
const labelValue = (label, bbKey) => {
|
|
344
|
+
const line = body.find((l) => l.segments.length >= 1 && l.segments[0].text.includes(label));
|
|
345
|
+
if (!line)
|
|
346
|
+
return "";
|
|
347
|
+
const valSeg = line.segments[1];
|
|
348
|
+
if (valSeg)
|
|
349
|
+
bb[bbKey] = toBBox(valSeg, line);
|
|
350
|
+
return line.segments.slice(1).map((s) => s.text).join(" ").trim();
|
|
351
|
+
};
|
|
352
|
+
// Land use types — label and value lines interleaved by y position
|
|
353
|
+
const landUseLabelIdx = body.findIndex((l) => l.segments.some((s) => s.text.includes("Land Use Types Present")));
|
|
354
|
+
const landUseConcernsIdx = body.findIndex((l) => l.segments.some((s) => s.text.includes("Land Use Concerns")));
|
|
355
|
+
let landUseTypesPresent = "";
|
|
356
|
+
if (landUseLabelIdx >= 0) {
|
|
357
|
+
const startIdx = Math.max(0, landUseLabelIdx - 2);
|
|
358
|
+
const endIdx = landUseConcernsIdx > landUseLabelIdx ? landUseConcernsIdx : landUseLabelIdx + 3;
|
|
359
|
+
const valueParts = [];
|
|
360
|
+
let firstValSeg = false;
|
|
361
|
+
for (let i = startIdx; i < endIdx; i++) {
|
|
362
|
+
for (const seg of body[i].segments) {
|
|
363
|
+
if (!seg.text.includes("Land Use Types Present") && seg.x >= 200) {
|
|
364
|
+
valueParts.push(seg.text.trim());
|
|
365
|
+
if (!firstValSeg) {
|
|
366
|
+
bb.landUseTypesPresent = toBBox(seg, body[i]);
|
|
367
|
+
firstValSeg = true;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
landUseTypesPresent = valueParts.join(" ").replace(/\s+/g, " ").replace(/,\s*$/, "").trim();
|
|
373
|
+
}
|
|
374
|
+
// Flood info
|
|
375
|
+
const floodMapLine = body.find((l) => l.segments.some((s) => s.text.includes("Map Number")));
|
|
376
|
+
const floodMapNumber = floodMapLine
|
|
377
|
+
? floodMapLine.segments[floodMapLine.segments.length - 1].text.trim()
|
|
378
|
+
: "";
|
|
379
|
+
if (floodMapLine) {
|
|
380
|
+
const valSeg = floodMapLine.segments[floodMapLine.segments.length - 1];
|
|
381
|
+
bb.floodMapNumber = toBBox(valSeg, floodMapLine);
|
|
382
|
+
}
|
|
383
|
+
const mapDateLine = body.find((l) => l.segments.some((s) => s.text.includes("Map Effective Date")));
|
|
384
|
+
const floodMapEffectiveDate = mapDateLine
|
|
385
|
+
? mapDateLine.segments[mapDateLine.segments.length - 1].text.trim()
|
|
386
|
+
: "";
|
|
387
|
+
if (mapDateLine) {
|
|
388
|
+
const valSeg = mapDateLine.segments[mapDateLine.segments.length - 1];
|
|
389
|
+
bb.floodMapEffectiveDate = toBBox(valSeg, mapDateLine);
|
|
390
|
+
}
|
|
391
|
+
const floodZoneLine = body.find((l) => l.segments.some((s) => s.text.includes("Is it in the Flood Zone?")));
|
|
392
|
+
const isInFloodZone = floodZoneLine
|
|
393
|
+
? floodZoneLine.segments[floodZoneLine.segments.length - 1].text.trim()
|
|
394
|
+
: "";
|
|
395
|
+
if (floodZoneLine) {
|
|
396
|
+
const valSeg = floodZoneLine.segments[floodZoneLine.segments.length - 1];
|
|
397
|
+
bb.isInFloodZone = toBBox(valSeg, floodZoneLine);
|
|
398
|
+
}
|
|
399
|
+
const specialFloodLine = body.find((l) => l.segments.some((s) => s.text.includes("Special Flood Hazard")));
|
|
400
|
+
const isInSpecialFloodHazard = specialFloodLine
|
|
401
|
+
? specialFloodLine.segments[specialFloodLine.segments.length - 1].text.trim()
|
|
402
|
+
: "";
|
|
403
|
+
if (specialFloodLine) {
|
|
404
|
+
const valSeg = specialFloodLine.segments[specialFloodLine.segments.length - 1];
|
|
405
|
+
bb.isInSpecialFloodHazard = toBBox(valSeg, specialFloodLine);
|
|
406
|
+
}
|
|
407
|
+
// Conformance
|
|
408
|
+
const conformanceLine = body.find((l) => l.segments.some((s) => s.text.includes("conformance issues")));
|
|
409
|
+
let conformanceIssues = "";
|
|
410
|
+
if (conformanceLine) {
|
|
411
|
+
const confIdx = conformanceLine.segments.findIndex((s) => s.text.includes("conformance issues"));
|
|
412
|
+
const answer = conformanceLine.segments[confIdx + 1];
|
|
413
|
+
if (answer && !answer.text.includes("Map Effective")) {
|
|
414
|
+
conformanceIssues = answer.text.trim();
|
|
415
|
+
bb.conformanceIssues = toBBox(answer, conformanceLine);
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
// Ownership
|
|
419
|
+
const ownershipLine = body.find((l) => l.segments.some((s) => /^Leasehold$/i.test(s.text.trim())));
|
|
420
|
+
let ownership = "";
|
|
421
|
+
if (ownershipLine) {
|
|
422
|
+
const leaseIdx = ownershipLine.segments.findIndex((s) => /^Leasehold$/i.test(s.text.trim()));
|
|
423
|
+
const answer = ownershipLine.segments[leaseIdx + 1];
|
|
424
|
+
if (answer && !answer.text.includes("Flood")) {
|
|
425
|
+
ownership = answer.text.trim();
|
|
426
|
+
bb.ownership = toBBox(answer, ownershipLine);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
// Zoning
|
|
430
|
+
const zoningLine = body.find((l) => l.page >= 3 && l.segments.length >= 2 && l.segments.some((s) => s.text.includes("Flood Information")));
|
|
431
|
+
const zoningIdx = zoningLine ? body.indexOf(zoningLine) : -1;
|
|
432
|
+
let zoningText = "";
|
|
433
|
+
if (zoningIdx >= 0 && zoningIdx + 1 < body.length) {
|
|
434
|
+
const nextLine = body[zoningIdx + 1];
|
|
435
|
+
zoningText = nextLine.segments[0]?.text.trim() ?? "";
|
|
436
|
+
if (nextLine.segments[0])
|
|
437
|
+
bb.zoning = toBBox(nextLine.segments[0], nextLine);
|
|
438
|
+
}
|
|
439
|
+
return {
|
|
440
|
+
landUseTypesPresent,
|
|
441
|
+
landUseConcerns: labelValue("Land Use Concerns:", "landUseConcerns"),
|
|
442
|
+
averageAgeOfResidentialUnits: labelValue("Average Age of Residential Units:", "averageAgeOfResidentialUnits"),
|
|
443
|
+
averageBuildingCondition: labelValue("Average Building Condition:", "averageBuildingCondition"),
|
|
444
|
+
averageBuildingQuality: labelValue("Average Building Quality:", "averageBuildingQuality"),
|
|
445
|
+
soldCompPercentRemodeled: labelValue("Sold Comp Percent Remodeled:", "soldCompPercentRemodeled"),
|
|
446
|
+
zoning: zoningText,
|
|
447
|
+
floodMapNumber,
|
|
448
|
+
floodMapEffectiveDate,
|
|
449
|
+
isInFloodZone,
|
|
450
|
+
isInSpecialFloodHazard,
|
|
451
|
+
conformanceIssues,
|
|
452
|
+
ownership,
|
|
453
|
+
boundingBoxes: bb,
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
// ── Prepared By ─────────────────────────────────────────────────────────────
|
|
457
|
+
function parsePreparedBySection(body) {
|
|
458
|
+
const bb = {};
|
|
459
|
+
const line = body.find((l) => /^Prepared By:/i.test(l.fullText));
|
|
460
|
+
if (!line)
|
|
461
|
+
return { name: "", email: "", phone: "", date: "", boundingBoxes: bb };
|
|
462
|
+
if (line.segments[0])
|
|
463
|
+
bb.preparedBy = toBBox(line.segments[0], line);
|
|
464
|
+
const text = line.fullText.replace(/^Prepared By:\s*/i, "");
|
|
465
|
+
const emailMatch = text.match(/([\w.+-]+@[\w.-]+)/);
|
|
466
|
+
const phoneMatch = text.match(/(\(?\d{3}\)?\s*[\d-]{7,})/);
|
|
467
|
+
const email = emailMatch ? emailMatch[1] : "";
|
|
468
|
+
const phone = phoneMatch ? phoneMatch[1] : "";
|
|
469
|
+
let name = text;
|
|
470
|
+
if (emailMatch)
|
|
471
|
+
name = name.slice(0, name.indexOf(emailMatch[1]));
|
|
472
|
+
name = name.replace(/,\s*$/, "").trim();
|
|
473
|
+
// Date is on a subsequent line
|
|
474
|
+
const lineIdx = body.indexOf(line);
|
|
475
|
+
let date = "";
|
|
476
|
+
for (let i = lineIdx + 1; i < body.length; i++) {
|
|
477
|
+
const t = body[i].fullText;
|
|
478
|
+
if (/\d{4}/.test(t) && /AM|PM/i.test(t)) {
|
|
479
|
+
date = t.trim();
|
|
480
|
+
if (body[i].segments[0])
|
|
481
|
+
bb.date = toBBox(body[i].segments[0], body[i]);
|
|
482
|
+
break;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
return { name, email, phone, date, boundingBoxes: bb };
|
|
486
|
+
}
|
|
487
|
+
// ── Valuation Page (Page 5) ─────────────────────────────────────────────────
|
|
488
|
+
function parseValuationPage(lines) {
|
|
489
|
+
const page5 = lines.filter((l) => l.page === 5 && !isHeaderOrFooter(l));
|
|
490
|
+
return {
|
|
491
|
+
valuationResults: parseValuationResults(page5),
|
|
492
|
+
renovationStrategies: parseRenovationStrategies(page5),
|
|
493
|
+
marketDemand: parseMarketDemand(page5),
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
function parseValuationResults(body) {
|
|
497
|
+
const bb = {};
|
|
498
|
+
const fieldVal = (label, bbKey) => {
|
|
499
|
+
const line = body.find((l) => l.segments[0]?.text.includes(label));
|
|
500
|
+
if (!line)
|
|
501
|
+
return "";
|
|
502
|
+
const valSeg = line.segments[line.segments.length - 1];
|
|
503
|
+
if (valSeg && valSeg !== line.segments[0])
|
|
504
|
+
bb[bbKey] = toBBox(valSeg, line);
|
|
505
|
+
return valSeg?.text.trim() ?? "";
|
|
506
|
+
};
|
|
507
|
+
const currentCondition = fieldVal("Current Condition", "currentCondition");
|
|
508
|
+
const asIs = fieldVal("Estimated As Is Market Value", "estimatedAsIsMarketValue");
|
|
509
|
+
const budget = fieldVal("Borrower Budget", "borrowerBudget");
|
|
510
|
+
const targetCondition = fieldVal("Borrower Target Condition", "borrowerTargetCondition");
|
|
511
|
+
const arv = fieldVal("Estimated ARV at Target Condition", "estimatedARV");
|
|
512
|
+
return {
|
|
513
|
+
currentCondition,
|
|
514
|
+
estimatedAsIsMarketValue: parseCurrency(asIs),
|
|
515
|
+
borrowerBudget: parseCurrency(budget),
|
|
516
|
+
borrowerTargetCondition: targetCondition,
|
|
517
|
+
estimatedARV: parseCurrency(arv),
|
|
518
|
+
boundingBoxes: bb,
|
|
519
|
+
};
|
|
520
|
+
}
|
|
521
|
+
function parseCurrency(val) {
|
|
522
|
+
const clean = val.replace(/[$,]/g, "").trim();
|
|
523
|
+
if (!clean)
|
|
524
|
+
return null;
|
|
525
|
+
const n = parseFloat(clean);
|
|
526
|
+
return isNaN(n) ? null : n;
|
|
527
|
+
}
|
|
528
|
+
function parseRenovationStrategies(body) {
|
|
529
|
+
const bb = {};
|
|
530
|
+
// Find the column header line with Min, Partial, Full, Best
|
|
531
|
+
const headerLine = body.find((l) => l.segments.some((s) => s.text.trim() === "Min") &&
|
|
532
|
+
l.segments.some((s) => s.text.trim() === "Full"));
|
|
533
|
+
if (headerLine?.segments[0])
|
|
534
|
+
bb.header = toBBox(headerLine.segments[0], headerLine);
|
|
535
|
+
// Determine value column boundaries from the header (Min, Partial, Full, Best).
|
|
536
|
+
const minSeg = headerLine?.segments.find((s) => s.text.trim() === "Min");
|
|
537
|
+
const bestSeg = headerLine?.segments.find((s) => s.text.trim() === "Best");
|
|
538
|
+
const valXMin = minSeg ? minSeg.x - 15 : 75;
|
|
539
|
+
const valXMax = bestSeg ? bestSeg.x + bestSeg.width + 15 : 290;
|
|
540
|
+
// Extract value segments: within the strategy column range only
|
|
541
|
+
const getValSegs = (line) => line.segments.filter((s) => s.x >= valXMin && s.x <= valXMax);
|
|
542
|
+
// Row parser: find line by label, extract 4 values from segments
|
|
543
|
+
const getRow = (label) => {
|
|
544
|
+
const line = body.find((l) => l.segments.some((s) => s.text.trim() === label || s.text.includes(label)));
|
|
545
|
+
if (!line)
|
|
546
|
+
return [undefined, undefined, undefined, undefined];
|
|
547
|
+
return getValSegs(line).map((s) => s.text.trim());
|
|
548
|
+
};
|
|
549
|
+
// Find a table row: line must have a label AND at least 3 value segments
|
|
550
|
+
const getRowWithBB = (label, bbPrefix) => {
|
|
551
|
+
const line = body.find((l) => {
|
|
552
|
+
const hasLabel = l.segments.some((s) => s.text.trim() === label || s.text.includes(label));
|
|
553
|
+
const valCount = getValSegs(l).length;
|
|
554
|
+
return hasLabel && valCount >= 3;
|
|
555
|
+
});
|
|
556
|
+
if (!line)
|
|
557
|
+
return [undefined, undefined, undefined, undefined];
|
|
558
|
+
const valSegs = getValSegs(line);
|
|
559
|
+
const strategies = ["min", "partial", "full", "best"];
|
|
560
|
+
valSegs.forEach((s, i) => {
|
|
561
|
+
if (strategies[i])
|
|
562
|
+
bb[`${bbPrefix}_${strategies[i]}`] = toBBox(s, line);
|
|
563
|
+
});
|
|
564
|
+
return valSegs.map((s) => s.text.trim());
|
|
565
|
+
};
|
|
566
|
+
const arvRow = getRowWithBB("ARV", "arv");
|
|
567
|
+
// "As Is Value" line has a quirk — first segment may include "As Is Value $580,000"
|
|
568
|
+
const asIsLine = body.find((l) => l.segments.some((s) => s.text.includes("As Is Value")));
|
|
569
|
+
let asIsRow = [undefined, undefined, undefined, undefined];
|
|
570
|
+
if (asIsLine) {
|
|
571
|
+
const asIsValSegs = getValSegs(asIsLine);
|
|
572
|
+
// First value may be embedded in "As Is Value $580,000"
|
|
573
|
+
const embedded = asIsLine.segments.find((s) => s.text.includes("As Is Value"))?.text.match(/\$([\d,]+)/)?.[0];
|
|
574
|
+
if (embedded && asIsValSegs.length < 4) {
|
|
575
|
+
asIsRow = [embedded, ...asIsValSegs.map((s) => s.text.trim())];
|
|
576
|
+
}
|
|
577
|
+
else {
|
|
578
|
+
asIsRow = asIsValSegs.map((s) => s.text.trim());
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
const rehabRow = getRowWithBB("Rehab", "rehab");
|
|
582
|
+
const sqftRow = getRowWithBB("$/sqft", "perSqft");
|
|
583
|
+
const basisRow = getRowWithBB("Basis", "basis");
|
|
584
|
+
const netLiftRow = getRowWithBB("Net Lift", "netLift");
|
|
585
|
+
// Gross Return — may be split: "Gross" on one line, percentages on another, "Return" on a third
|
|
586
|
+
// Look for any line with percentage values in the strategy column range
|
|
587
|
+
const grossReturnLine = body.find((l) => {
|
|
588
|
+
const pctSegs = l.segments.filter((s) => /\d+\.\d+%/.test(s.text) && s.x >= valXMin);
|
|
589
|
+
return pctSegs.length >= 3;
|
|
590
|
+
});
|
|
591
|
+
const returnVals = grossReturnLine
|
|
592
|
+
? grossReturnLine.segments.filter((s) => /\d+\.\d+%/.test(s.text)).map((s) => s.text.trim())
|
|
593
|
+
: [];
|
|
594
|
+
const returnStrategies = ["min", "partial", "full", "best"];
|
|
595
|
+
if (grossReturnLine) {
|
|
596
|
+
grossReturnLine.segments.filter((s) => /\d+\.\d+%/.test(s.text)).forEach((s, i) => {
|
|
597
|
+
if (returnStrategies[i])
|
|
598
|
+
bb[`grossReturn_${returnStrategies[i]}`] = toBBox(s, grossReturnLine);
|
|
599
|
+
});
|
|
600
|
+
}
|
|
601
|
+
// Timeline rows — use getValSegs for position-independent extraction
|
|
602
|
+
const rehabTimeLine = body.find((l) => l.segments.some((s) => /Rehab Time/.test(s.text)));
|
|
603
|
+
const rehabTimeVals = rehabTimeLine
|
|
604
|
+
? getValSegs(rehabTimeLine).map((s) => s.text.trim())
|
|
605
|
+
: [];
|
|
606
|
+
const ttsLine = body.find((l) => l.segments.some((s) => s.text.trim() === "Estim TTS"));
|
|
607
|
+
const ttsVals = ttsLine
|
|
608
|
+
? getValSegs(ttsLine).map((s) => s.text.trim())
|
|
609
|
+
: [];
|
|
610
|
+
const cushionLine = body.find((l) => l.segments.some((s) => s.text.trim() === "Cushion"));
|
|
611
|
+
const cushionVals = cushionLine
|
|
612
|
+
? getValSegs(cushionLine).map((s) => s.text.trim())
|
|
613
|
+
: [];
|
|
614
|
+
const totalTimeLine = body.find((l) => l.segments.some((s) => s.text.trim() === "Total Time"));
|
|
615
|
+
const totalTimeVals = totalTimeLine
|
|
616
|
+
? getValSegs(totalTimeLine).map((s) => s.text.trim())
|
|
617
|
+
: [];
|
|
618
|
+
// Annualized Return — may be split across lines, look for "1.42x" style values
|
|
619
|
+
const annReturnLine = body.find((l) => l.segments.some((s) => /\d+\.\d+x/.test(s.text)));
|
|
620
|
+
const annReturnVals = annReturnLine
|
|
621
|
+
? annReturnLine.segments.filter((s) => /\d+\.\d+x/.test(s.text)).map((s) => s.text.trim())
|
|
622
|
+
: [];
|
|
623
|
+
const buildStrategy = (i) => {
|
|
624
|
+
const stratBb = {};
|
|
625
|
+
// Copy relevant bounding boxes for this strategy column
|
|
626
|
+
const prefix = ["min", "partial", "full", "best"][i];
|
|
627
|
+
for (const [k, v] of Object.entries(bb)) {
|
|
628
|
+
if (k.endsWith(`_${prefix}`)) {
|
|
629
|
+
stratBb[k.replace(`_${prefix}`, "")] = v;
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
return {
|
|
633
|
+
arv: parseCurrency(arvRow[i] ?? ""),
|
|
634
|
+
asIsValue: parseCurrency(asIsRow[i] ?? ""),
|
|
635
|
+
rehab: parseCurrency(rehabRow[i] ?? ""),
|
|
636
|
+
perSqft: parseCurrency(sqftRow[i] ?? ""),
|
|
637
|
+
basis: parseCurrency(basisRow[i] ?? ""),
|
|
638
|
+
netLift: parseCurrency(netLiftRow[i] ?? ""),
|
|
639
|
+
grossReturn: returnVals[i] ?? "",
|
|
640
|
+
rehabTime: parseNum(rehabTimeVals[i]),
|
|
641
|
+
estimatedTTS: parseNum(ttsVals[i]),
|
|
642
|
+
cushion: parseNum(cushionVals[i]),
|
|
643
|
+
totalTime: parseNum(totalTimeVals[i]),
|
|
644
|
+
annualizedReturn: annReturnVals[i] ?? "",
|
|
645
|
+
boundingBoxes: stratBb,
|
|
646
|
+
};
|
|
647
|
+
};
|
|
648
|
+
return {
|
|
649
|
+
min: buildStrategy(0),
|
|
650
|
+
partial: buildStrategy(1),
|
|
651
|
+
full: buildStrategy(2),
|
|
652
|
+
best: buildStrategy(3),
|
|
653
|
+
boundingBoxes: bb,
|
|
654
|
+
};
|
|
655
|
+
}
|
|
656
|
+
function parseMarketDemand(body) {
|
|
657
|
+
const bb = {};
|
|
658
|
+
// Market Demand line: "Market Demand", score, "Return", ...
|
|
659
|
+
const demandLine = body.find((l) => l.segments.some((s) => s.text.includes("Market Demand")));
|
|
660
|
+
let score = null;
|
|
661
|
+
if (demandLine) {
|
|
662
|
+
const scoreSeg = demandLine.segments.find((s) => /^\d+$/.test(s.text.trim()));
|
|
663
|
+
if (scoreSeg) {
|
|
664
|
+
score = parseInt(scoreSeg.text.trim(), 10);
|
|
665
|
+
bb.score = toBBox(scoreSeg, demandLine);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
// "Strong"/"Moderate"/"Weak" label — appears after the Market Demand line,
|
|
669
|
+
// may share a line with other segments. Search only after the demand line.
|
|
670
|
+
const demandIdx = demandLine ? body.indexOf(demandLine) : -1;
|
|
671
|
+
const afterDemand = demandIdx >= 0 ? body.slice(demandIdx + 1) : body;
|
|
672
|
+
const strongLine = afterDemand.find((l) => l.segments.some((s) => /^(Strong|Moderate|Weak)$/i.test(s.text.trim())));
|
|
673
|
+
const strongSeg = strongLine?.segments.find((s) => /^(Strong|Moderate|Weak)$/i.test(s.text.trim()));
|
|
674
|
+
const label = strongSeg?.text.trim() ?? "";
|
|
675
|
+
if (strongSeg && strongLine)
|
|
676
|
+
bb.label = toBBox(strongSeg, strongLine);
|
|
677
|
+
// Left-side fields — value is in seg[1], but may be merged with right-side table label.
|
|
678
|
+
// Only take the portion before known table labels (e.g., "Rehab Time", "Estim TTS").
|
|
679
|
+
const tableLabels = /\b(Rehab Time|Estim TTS|Cushion|Total Time|Annualized)/;
|
|
680
|
+
const leftField = (fieldLabel, bbKey) => {
|
|
681
|
+
const line = body.find((l) => l.segments[0]?.text.trim() === fieldLabel ||
|
|
682
|
+
l.segments[0]?.text.includes(fieldLabel));
|
|
683
|
+
if (!line || line.segments.length < 2)
|
|
684
|
+
return "";
|
|
685
|
+
const valSeg = line.segments[1];
|
|
686
|
+
if (valSeg && valSeg.x < 350) {
|
|
687
|
+
bb[bbKey] = toBBox(valSeg, line);
|
|
688
|
+
let val = valSeg.text.trim();
|
|
689
|
+
// Strip any table label that got merged into this segment
|
|
690
|
+
const tableMatch = val.match(tableLabels);
|
|
691
|
+
if (tableMatch)
|
|
692
|
+
val = val.slice(0, tableMatch.index).trim();
|
|
693
|
+
return val;
|
|
694
|
+
}
|
|
695
|
+
return "";
|
|
696
|
+
};
|
|
697
|
+
return {
|
|
698
|
+
score,
|
|
699
|
+
label,
|
|
700
|
+
location: leftField("Location", "location"),
|
|
701
|
+
inventory: leftField("Inventory", "inventory"),
|
|
702
|
+
medianTTS: leftField("Median TTS", "medianTTS"),
|
|
703
|
+
percentRemodeled: leftField("% Remodeled", "percentRemodeled"),
|
|
704
|
+
boundingBoxes: bb,
|
|
705
|
+
};
|
|
706
|
+
}
|
|
707
|
+
// ── Comparables (Pages 6, 11, 15-17) ────────────────────────────────────────
|
|
708
|
+
/** Condition group headers in the comp tables */
|
|
709
|
+
const CONDITION_GROUPS = [
|
|
710
|
+
"Newly Built", "Full Remodel", "Partial Remodel", "Maintained",
|
|
711
|
+
"Moderate", "Poor", "Very Poor", "Unsalvageable",
|
|
712
|
+
];
|
|
713
|
+
function parseComparablesSection(lines, sectionTitle) {
|
|
714
|
+
const headerIdx = lines.findIndex((l) => l.fullText.includes(sectionTitle));
|
|
715
|
+
if (headerIdx < 0)
|
|
716
|
+
return { title: sectionTitle, comparables: [] };
|
|
717
|
+
const headerPage = lines[headerIdx].page;
|
|
718
|
+
// Collect table lines from this section until next section or photo pages
|
|
719
|
+
const tableLines = [];
|
|
720
|
+
for (let i = headerIdx + 1; i < lines.length; i++) {
|
|
721
|
+
const line = lines[i];
|
|
722
|
+
if (/^Photos for/i.test(line.fullText))
|
|
723
|
+
break;
|
|
724
|
+
if (line.page > headerPage + 5)
|
|
725
|
+
break;
|
|
726
|
+
if (/^(Closest Market Comparables|Additional Comparables|Additional Comps Excluded|Budget Flags|Budget Line Items)$/i.test(line.fullText) &&
|
|
727
|
+
line.page !== headerPage)
|
|
728
|
+
break;
|
|
729
|
+
if (isHeaderOrFooter(line))
|
|
730
|
+
continue;
|
|
731
|
+
tableLines.push(line);
|
|
732
|
+
}
|
|
733
|
+
// Parse comp rows
|
|
734
|
+
let currentGroup = "";
|
|
735
|
+
const comparables = [];
|
|
736
|
+
for (const line of tableLines) {
|
|
737
|
+
const firstSeg = line.segments[0]?.text.trim();
|
|
738
|
+
// Condition group header: "#" + group name
|
|
739
|
+
if (firstSeg === "#" && line.segments.length >= 2) {
|
|
740
|
+
const groupName = line.segments[1]?.text.trim();
|
|
741
|
+
if (CONDITION_GROUPS.some((g) => groupName === g)) {
|
|
742
|
+
currentGroup = groupName;
|
|
743
|
+
}
|
|
744
|
+
continue;
|
|
745
|
+
}
|
|
746
|
+
// Skip subject and non-data lines
|
|
747
|
+
if (firstSeg === "S" || firstSeg === "#")
|
|
748
|
+
continue;
|
|
749
|
+
if (!/^\d+$/.test(firstSeg ?? ""))
|
|
750
|
+
continue;
|
|
751
|
+
const comp = parseCompRow(line, parseInt(firstSeg, 10), currentGroup);
|
|
752
|
+
if (comp)
|
|
753
|
+
comparables.push(comp);
|
|
754
|
+
}
|
|
755
|
+
return { title: sectionTitle, comparables };
|
|
756
|
+
}
|
|
757
|
+
function parseCompRow(line, num, group) {
|
|
758
|
+
const bb = {};
|
|
759
|
+
// Address: segments with x < 155 (after the # segment)
|
|
760
|
+
const addrSegs = line.segments.filter((s) => s.x > 40 && s.x < 155);
|
|
761
|
+
const address = addrSegs.map((s) => s.text.trim()).join(" ");
|
|
762
|
+
if (addrSegs[0])
|
|
763
|
+
bb.address = toBBox(addrSegs[0], line);
|
|
764
|
+
// Data: segments with x >= 150 — concatenate and parse
|
|
765
|
+
const dataSegs = line.segments.filter((s) => s.x >= 150);
|
|
766
|
+
const dataText = dataSegs.map((s) => s.text.trim()).join(" ");
|
|
767
|
+
// Parse numeric data after address.
|
|
768
|
+
// Pattern: [Type] sqft bd bth year stories lot dist [flags] [grg] COE SP $/sqft C TTS [score]
|
|
769
|
+
// Type is optional (e.g., "C", "TH", "SF", "QP") — strip it if present
|
|
770
|
+
const stripped = dataText.replace(/^[A-Z]{1,3}\s+/, "");
|
|
771
|
+
const m = stripped.match(/^([\d,]+)\s+(\d+)\s+([\d.]+)\s+(\d{4})\s+([\d.]+)\s+([\d.]+|unkn)\s+([\d.]+)\s+(.+)$/);
|
|
772
|
+
if (!m)
|
|
773
|
+
return null;
|
|
774
|
+
const sqft = parseInt(m[1].replace(/,/g, ""), 10);
|
|
775
|
+
const beds = parseInt(m[2], 10);
|
|
776
|
+
const baths = parseFloat(m[3]);
|
|
777
|
+
const yearBuilt = parseInt(m[4], 10);
|
|
778
|
+
const stories = parseFloat(m[5]);
|
|
779
|
+
const lot = m[6] === "unkn" ? null : parseFloat(m[6]);
|
|
780
|
+
const dist = parseFloat(m[7]);
|
|
781
|
+
const tail = m[8];
|
|
782
|
+
// Parse the tail: [flags] [grg] COE SP $/sqft C TTS [score]
|
|
783
|
+
const tailMatch = tail.match(/^(\d+)?\s*(\d+)?\s*(\d{1,2}\/\d{1,2}\/\d{2,4})\s+\$([\d,]+)\s+\$(\d+)\s+([\d.]+)\s+(\d+)\s*([\d.]+)?$/);
|
|
784
|
+
let flags = null;
|
|
785
|
+
let garage = null;
|
|
786
|
+
let closeOfEscrow = "";
|
|
787
|
+
let salePrice = null;
|
|
788
|
+
let pricePerSqft = null;
|
|
789
|
+
let condition = null;
|
|
790
|
+
let timeToSale = null;
|
|
791
|
+
let score = null;
|
|
792
|
+
if (tailMatch) {
|
|
793
|
+
const pre1 = tailMatch[1];
|
|
794
|
+
const pre2 = tailMatch[2];
|
|
795
|
+
if (pre2 !== undefined) {
|
|
796
|
+
flags = parseInt(pre1, 10);
|
|
797
|
+
garage = parseInt(pre2, 10);
|
|
798
|
+
}
|
|
799
|
+
else if (pre1 !== undefined) {
|
|
800
|
+
garage = parseInt(pre1, 10);
|
|
801
|
+
}
|
|
802
|
+
closeOfEscrow = tailMatch[3];
|
|
803
|
+
salePrice = parseInt(tailMatch[4].replace(/,/g, ""), 10);
|
|
804
|
+
pricePerSqft = parseInt(tailMatch[5], 10);
|
|
805
|
+
condition = parseFloat(tailMatch[6]);
|
|
806
|
+
timeToSale = parseInt(tailMatch[7], 10);
|
|
807
|
+
score = tailMatch[8] !== undefined ? parseFloat(tailMatch[8]) : null;
|
|
808
|
+
}
|
|
809
|
+
// Attach bounding boxes — map each segment to a field by x-coordinate.
|
|
810
|
+
// Segments are often merged, so we use the x position to determine which
|
|
811
|
+
// field the segment primarily represents.
|
|
812
|
+
for (const seg of dataSegs) {
|
|
813
|
+
const t = seg.text.trim();
|
|
814
|
+
const x = seg.x;
|
|
815
|
+
if (x < 200 && !bb.sqft) {
|
|
816
|
+
// First data segment covers sqft, beds, baths, yearBuilt (often merged)
|
|
817
|
+
const box = toBBox(seg, line);
|
|
818
|
+
bb.sqft = box;
|
|
819
|
+
bb.beds = box;
|
|
820
|
+
bb.baths = box;
|
|
821
|
+
bb.yearBuilt = box;
|
|
822
|
+
}
|
|
823
|
+
else if (x >= 250 && x < 295 && !bb.stories) {
|
|
824
|
+
bb.stories = toBBox(seg, line);
|
|
825
|
+
}
|
|
826
|
+
else if (x >= 285 && x < 325 && !bb.lot) {
|
|
827
|
+
bb.lot = toBBox(seg, line);
|
|
828
|
+
}
|
|
829
|
+
else if (x >= 325 && x < 380 && /^\d/.test(t) && !bb.distance) {
|
|
830
|
+
bb.distance = toBBox(seg, line);
|
|
831
|
+
}
|
|
832
|
+
else if (/\d{1,2}\/\d{1,2}\/\d{2,4}/.test(t) && !bb.closeOfEscrow) {
|
|
833
|
+
bb.closeOfEscrow = toBBox(seg, line);
|
|
834
|
+
}
|
|
835
|
+
else if (/^\$[\d,]+$/.test(t) && t.length > 5 && !bb.salePrice) {
|
|
836
|
+
bb.salePrice = toBBox(seg, line);
|
|
837
|
+
}
|
|
838
|
+
else if (/^\$\d+$/.test(t) && !bb.pricePerSqft) {
|
|
839
|
+
bb.pricePerSqft = toBBox(seg, line);
|
|
840
|
+
}
|
|
841
|
+
else if (x >= 495 && x < 545 && /^[\d.]+$/.test(t) && !bb.condition) {
|
|
842
|
+
bb.condition = toBBox(seg, line);
|
|
843
|
+
}
|
|
844
|
+
else if (x >= 520 && x < 575 && /^\d+$/.test(t) && !bb.timeToSale) {
|
|
845
|
+
bb.timeToSale = toBBox(seg, line);
|
|
846
|
+
}
|
|
847
|
+
else if (x >= 555 && /^[\d.]+$/.test(t) && !bb.score) {
|
|
848
|
+
bb.score = toBBox(seg, line);
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
// For merged segments (e.g., "10/3/25 $750,000"), try to pick up SP from merged text
|
|
852
|
+
if (!bb.salePrice) {
|
|
853
|
+
const spSeg = dataSegs.find((s) => /\$[\d,]{4,}/.test(s.text));
|
|
854
|
+
if (spSeg)
|
|
855
|
+
bb.salePrice = toBBox(spSeg, line);
|
|
856
|
+
}
|
|
857
|
+
return {
|
|
858
|
+
number: num,
|
|
859
|
+
address,
|
|
860
|
+
conditionGroup: group,
|
|
861
|
+
sqft,
|
|
862
|
+
beds,
|
|
863
|
+
baths,
|
|
864
|
+
yearBuilt,
|
|
865
|
+
stories,
|
|
866
|
+
lot,
|
|
867
|
+
distance: dist,
|
|
868
|
+
flags,
|
|
869
|
+
garage,
|
|
870
|
+
closeOfEscrow,
|
|
871
|
+
salePrice,
|
|
872
|
+
pricePerSqft,
|
|
873
|
+
condition,
|
|
874
|
+
timeToSale,
|
|
875
|
+
score,
|
|
876
|
+
boundingBoxes: bb,
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
// ── Budget Flags (Page 18) ────────────────────────────────────────────────────
|
|
880
|
+
const CONCERN_LEVELS = [
|
|
881
|
+
"Significant Concerns",
|
|
882
|
+
"Medium Concerns",
|
|
883
|
+
"Moderate Concerns",
|
|
884
|
+
"Cautionary Concerns",
|
|
885
|
+
];
|
|
886
|
+
function parseBudgetFlags(lines) {
|
|
887
|
+
const headerIdx = lines.findIndex((l) => /^Budget Flags$/i.test(l.fullText.trim()));
|
|
888
|
+
const headerPage = headerIdx >= 0 ? lines[headerIdx].page : -1;
|
|
889
|
+
const body = headerIdx >= 0
|
|
890
|
+
? lines.filter((l) => l.page === headerPage && !isHeaderOrFooter(l) && l.y > lines[headerIdx].y)
|
|
891
|
+
: [];
|
|
892
|
+
const bb = {};
|
|
893
|
+
if (headerIdx >= 0) {
|
|
894
|
+
const hl = lines[headerIdx];
|
|
895
|
+
bb.title = toBBox(hl.segments[0], hl);
|
|
896
|
+
}
|
|
897
|
+
const concerns = [];
|
|
898
|
+
for (let i = 0; i < CONCERN_LEVELS.length; i++) {
|
|
899
|
+
const level = CONCERN_LEVELS[i];
|
|
900
|
+
const levelIdx = body.findIndex((l) => l.fullText.trim() === level);
|
|
901
|
+
if (levelIdx < 0)
|
|
902
|
+
continue;
|
|
903
|
+
const sectionBB = {};
|
|
904
|
+
const levelLine = body[levelIdx];
|
|
905
|
+
sectionBB.level = toBBox(levelLine.segments[0], levelLine);
|
|
906
|
+
// Collect items until next concern level or "Missing Line Items"
|
|
907
|
+
const items = [];
|
|
908
|
+
for (let j = levelIdx + 1; j < body.length; j++) {
|
|
909
|
+
const text = body[j].fullText.trim();
|
|
910
|
+
if (CONCERN_LEVELS.includes(text) || /^Missing Line Items$/i.test(text))
|
|
911
|
+
break;
|
|
912
|
+
if (text && !/^No line items flagged\.?$/i.test(text) && !/^Specific Line Item/i.test(text)) {
|
|
913
|
+
items.push(text);
|
|
914
|
+
sectionBB[`item${items.length}`] = toBBox(body[j].segments[0], body[j]);
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
concerns.push({ level, items, boundingBoxes: sectionBB });
|
|
918
|
+
}
|
|
919
|
+
// Missing Line Items
|
|
920
|
+
let missingLineItems = "";
|
|
921
|
+
const missingIdx = body.findIndex((l) => /^Missing Line Items$/i.test(l.fullText.trim()));
|
|
922
|
+
if (missingIdx >= 0) {
|
|
923
|
+
const missingLine = body[missingIdx];
|
|
924
|
+
bb.missingLineItems = toBBox(missingLine.segments[0], missingLine);
|
|
925
|
+
const textLines = [];
|
|
926
|
+
for (let j = missingIdx + 1; j < body.length; j++) {
|
|
927
|
+
const text = body[j].fullText.trim();
|
|
928
|
+
if (!text)
|
|
929
|
+
continue;
|
|
930
|
+
textLines.push(text);
|
|
931
|
+
if (!bb.missingLineItemsText) {
|
|
932
|
+
bb.missingLineItemsText = toBBox(body[j].segments[0], body[j]);
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
missingLineItems = textLines.join(" ");
|
|
936
|
+
}
|
|
937
|
+
return { concerns, missingLineItems, boundingBoxes: bb };
|
|
938
|
+
}
|
|
939
|
+
// ── Budget Line Items (Page 19) ───────────────────────────────────────────────
|
|
940
|
+
function parseDollarValues(segments) {
|
|
941
|
+
const values = [];
|
|
942
|
+
// Only look at segments in the dollar columns (x >= 370)
|
|
943
|
+
for (const seg of segments) {
|
|
944
|
+
if (seg.x < 370)
|
|
945
|
+
continue;
|
|
946
|
+
const matches = seg.text.match(/\$[\d,]+/g);
|
|
947
|
+
if (matches) {
|
|
948
|
+
for (const m of matches) {
|
|
949
|
+
values.push(parseInt(m.replace(/[$,]/g, ""), 10));
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
return values;
|
|
954
|
+
}
|
|
955
|
+
function parseBudgetLineItems(lines) {
|
|
956
|
+
const headerIdx = lines.findIndex((l) => /^Budget Line Items$/i.test(l.fullText.trim()));
|
|
957
|
+
const headerPage = headerIdx >= 0 ? lines[headerIdx].page : -1;
|
|
958
|
+
const body = headerIdx >= 0
|
|
959
|
+
? lines.filter((l) => l.page === headerPage && !isHeaderOrFooter(l) && l.y > lines[headerIdx].y)
|
|
960
|
+
: [];
|
|
961
|
+
const bb = {};
|
|
962
|
+
if (headerIdx >= 0) {
|
|
963
|
+
const hl = lines[headerIdx];
|
|
964
|
+
bb.title = toBBox(hl.segments[0], hl);
|
|
965
|
+
}
|
|
966
|
+
const categories = [];
|
|
967
|
+
let currentCategory = null;
|
|
968
|
+
let totalHR = null;
|
|
969
|
+
let totalDM = null;
|
|
970
|
+
let totalUP = null;
|
|
971
|
+
let totalRC = null;
|
|
972
|
+
let totalSoft = null;
|
|
973
|
+
let grandTotal = null;
|
|
974
|
+
for (const line of body) {
|
|
975
|
+
const text = line.fullText.trim();
|
|
976
|
+
// Skip the column header row
|
|
977
|
+
if (/^Budget Items\b/i.test(text))
|
|
978
|
+
continue;
|
|
979
|
+
// Check if this is a Total row
|
|
980
|
+
if (/^Total\b/.test(text) && line.segments.some((s) => /\$/.test(s.text))) {
|
|
981
|
+
const vals = parseDollarValues(line.segments);
|
|
982
|
+
[totalHR, totalDM, totalUP, totalRC, totalSoft, grandTotal] =
|
|
983
|
+
vals.map((v) => v ?? null);
|
|
984
|
+
const totalSeg = line.segments.find((s) => /Total/.test(s.text));
|
|
985
|
+
if (totalSeg)
|
|
986
|
+
bb.total = toBBox(totalSeg, line);
|
|
987
|
+
const lastSeg = line.segments[line.segments.length - 1];
|
|
988
|
+
if (lastSeg)
|
|
989
|
+
bb.grandTotal = toBBox(lastSeg, line);
|
|
990
|
+
continue;
|
|
991
|
+
}
|
|
992
|
+
// Check if this is a numbered item row.
|
|
993
|
+
// Case 1: first segment is just a number (e.g., "1" at x~32)
|
|
994
|
+
// Case 2: number and name merged (e.g., "1 Dumpster / Debris Removal" at x~31)
|
|
995
|
+
const firstSeg = line.segments[0];
|
|
996
|
+
const separateNum = firstSeg && firstSeg.x < 50 && /^\d+$/.test(firstSeg.text.trim());
|
|
997
|
+
const mergedNum = firstSeg && firstSeg.x < 50 && /^\d+\s+\S/.test(firstSeg.text.trim());
|
|
998
|
+
const isItemRow = separateNum || mergedNum;
|
|
999
|
+
if (isItemRow) {
|
|
1000
|
+
const itemBB = {};
|
|
1001
|
+
let num;
|
|
1002
|
+
let name;
|
|
1003
|
+
if (separateNum) {
|
|
1004
|
+
num = parseInt(firstSeg.text.trim(), 10);
|
|
1005
|
+
itemBB.number = toBBox(firstSeg, line);
|
|
1006
|
+
const nameSeg = line.segments.find((s) => s.x >= 55 && s.x < 200);
|
|
1007
|
+
name = nameSeg?.text.trim() ?? "";
|
|
1008
|
+
if (nameSeg)
|
|
1009
|
+
itemBB.name = toBBox(nameSeg, line);
|
|
1010
|
+
}
|
|
1011
|
+
else {
|
|
1012
|
+
// Number and name merged in one segment
|
|
1013
|
+
const match = firstSeg.text.trim().match(/^(\d+)\s+(.+)$/);
|
|
1014
|
+
num = parseInt(match[1], 10);
|
|
1015
|
+
name = match[2].trim();
|
|
1016
|
+
itemBB.number = toBBox(firstSeg, line);
|
|
1017
|
+
itemBB.name = toBBox(firstSeg, line);
|
|
1018
|
+
}
|
|
1019
|
+
// Description segment at x~258
|
|
1020
|
+
const descSeg = line.segments.find((s) => s.x >= 200 && s.x < 370);
|
|
1021
|
+
const description = descSeg?.text.trim() ?? "";
|
|
1022
|
+
if (descSeg)
|
|
1023
|
+
itemBB.description = toBBox(descSeg, line);
|
|
1024
|
+
const vals = parseDollarValues(line.segments);
|
|
1025
|
+
const [hr = null, dm = null, up = null, rc = null, soft = null, total = null] = vals.map((v) => v ?? null);
|
|
1026
|
+
// Bounding box for total (last segment)
|
|
1027
|
+
const lastSeg = line.segments[line.segments.length - 1];
|
|
1028
|
+
if (lastSeg && /\$/.test(lastSeg.text))
|
|
1029
|
+
itemBB.total = toBBox(lastSeg, line);
|
|
1030
|
+
const item = {
|
|
1031
|
+
number: num,
|
|
1032
|
+
name,
|
|
1033
|
+
description,
|
|
1034
|
+
hr, dm, up, rc, soft, total,
|
|
1035
|
+
boundingBoxes: itemBB,
|
|
1036
|
+
};
|
|
1037
|
+
if (currentCategory) {
|
|
1038
|
+
currentCategory.items.push(item);
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
else if (line.segments.some((s) => /\$/.test(s.text))) {
|
|
1042
|
+
// Category row: has dollar values but no leading number
|
|
1043
|
+
const catBB = {};
|
|
1044
|
+
const catNameSeg = line.segments.find((s) => s.x < 200);
|
|
1045
|
+
const catName = catNameSeg?.text.trim() ?? "";
|
|
1046
|
+
if (catNameSeg)
|
|
1047
|
+
catBB.name = toBBox(catNameSeg, line);
|
|
1048
|
+
const vals = parseDollarValues(line.segments);
|
|
1049
|
+
const [hr = null, dm = null, up = null, rc = null, soft = null, total = null] = vals.map((v) => v ?? null);
|
|
1050
|
+
const lastSeg = line.segments[line.segments.length - 1];
|
|
1051
|
+
if (lastSeg && /\$/.test(lastSeg.text))
|
|
1052
|
+
catBB.total = toBBox(lastSeg, line);
|
|
1053
|
+
currentCategory = {
|
|
1054
|
+
name: catName,
|
|
1055
|
+
hr, dm, up, rc, soft, total,
|
|
1056
|
+
items: [],
|
|
1057
|
+
boundingBoxes: catBB,
|
|
1058
|
+
};
|
|
1059
|
+
categories.push(currentCategory);
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
return {
|
|
1063
|
+
categories,
|
|
1064
|
+
totalHR, totalDM, totalUP, totalRC, totalSoft, grandTotal,
|
|
1065
|
+
boundingBoxes: bb,
|
|
1066
|
+
};
|
|
1067
|
+
}
|