@parseo/appraisals 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +35 -0
  2. package/dist/form-1004mc/extract-checkboxes.d.ts +43 -0
  3. package/dist/form-1004mc/extract-checkboxes.d.ts.map +1 -0
  4. package/dist/form-1004mc/extract-checkboxes.js +145 -0
  5. package/dist/form-1004mc/index.d.ts +3 -0
  6. package/dist/form-1004mc/index.d.ts.map +1 -0
  7. package/dist/form-1004mc/index.js +1 -0
  8. package/dist/form-1004mc/parse-page1.d.ts +8 -0
  9. package/dist/form-1004mc/parse-page1.d.ts.map +1 -0
  10. package/dist/form-1004mc/parse-page1.js +760 -0
  11. package/dist/form-1004mc/parse-sales.d.ts +6 -0
  12. package/dist/form-1004mc/parse-sales.d.ts.map +1 -0
  13. package/dist/form-1004mc/parse-sales.js +505 -0
  14. package/dist/form-1004mc/parser.d.ts +5 -0
  15. package/dist/form-1004mc/parser.d.ts.map +1 -0
  16. package/dist/form-1004mc/parser.js +437 -0
  17. package/dist/form-1004mc/types.d.ts +302 -0
  18. package/dist/form-1004mc/types.d.ts.map +1 -0
  19. package/dist/form-1004mc/types.js +1 -0
  20. package/dist/form-1073/index.d.ts +3 -0
  21. package/dist/form-1073/index.d.ts.map +1 -0
  22. package/dist/form-1073/index.js +1 -0
  23. package/dist/form-1073/parse-page1.d.ts +8 -0
  24. package/dist/form-1073/parse-page1.d.ts.map +1 -0
  25. package/dist/form-1073/parse-page1.js +704 -0
  26. package/dist/form-1073/parse-page2.d.ts +6 -0
  27. package/dist/form-1073/parse-page2.d.ts.map +1 -0
  28. package/dist/form-1073/parse-page2.js +438 -0
  29. package/dist/form-1073/parse-sales.d.ts +7 -0
  30. package/dist/form-1073/parse-sales.d.ts.map +1 -0
  31. package/dist/form-1073/parse-sales.js +477 -0
  32. package/dist/form-1073/parser.d.ts +5 -0
  33. package/dist/form-1073/parser.d.ts.map +1 -0
  34. package/dist/form-1073/parser.js +102 -0
  35. package/dist/form-1073/types.d.ts +300 -0
  36. package/dist/form-1073/types.d.ts.map +1 -0
  37. package/dist/form-1073/types.js +1 -0
  38. package/dist/index.d.ts +13 -0
  39. package/dist/index.d.ts.map +1 -0
  40. package/dist/index.js +9 -0
  41. package/dist/richer-values/index.d.ts +3 -0
  42. package/dist/richer-values/index.d.ts.map +1 -0
  43. package/dist/richer-values/index.js +1 -0
  44. package/dist/richer-values/parser.d.ts +5 -0
  45. package/dist/richer-values/parser.d.ts.map +1 -0
  46. package/dist/richer-values/parser.js +1067 -0
  47. package/dist/richer-values/types.d.ts +225 -0
  48. package/dist/richer-values/types.d.ts.map +1 -0
  49. package/dist/richer-values/types.js +1 -0
  50. package/package.json +24 -0
@@ -0,0 +1,760 @@
1
+ import { toBBox } from "@parseo/shared";
2
+ // ── Utilities ─────────────────────────────────────────────────────────────
3
+ function parseNum(raw) {
4
+ if (!raw)
5
+ return null;
6
+ const cleaned = raw.replace(/[$,%]/g, "").replace(/,/g, "").trim();
7
+ if (!cleaned)
8
+ return null;
9
+ const n = Number(cleaned);
10
+ return Number.isNaN(n) ? null : n;
11
+ }
12
+ /** Extract value after a label in a segment's text */
13
+ function extractAfterLabel(seg, label) {
14
+ return seg.text.replace(label, "").trim();
15
+ }
16
+ /** Find the first line matching a pattern, optionally within a y-range */
17
+ function findLine(lines, pattern, opts) {
18
+ return lines.find((l) => {
19
+ if (opts?.minY !== undefined && l.y < opts.minY)
20
+ return false;
21
+ if (opts?.maxY !== undefined && l.y > opts.maxY)
22
+ return false;
23
+ return pattern.test(l.fullText);
24
+ });
25
+ }
26
+ /** Get the value from a label-value segment like "Label VALUE" */
27
+ function labelValue(line, label) {
28
+ if (!line)
29
+ return { value: "" };
30
+ for (const seg of line.segments) {
31
+ if (label.test(seg.text)) {
32
+ return { value: extractAfterLabel(seg, label), seg };
33
+ }
34
+ }
35
+ return { value: "" };
36
+ }
37
+ /** Get segment at a specific x-range on a line */
38
+ function segAtX(line, minX, maxX) {
39
+ const seg = line.segments.find((s) => s.x >= minX && s.x < maxX);
40
+ return seg ? seg.text.trim() : "";
41
+ }
42
+ // ── Subject ───────────────────────────────────────────────────────────────
43
+ export function parseSubjectSection(lines) {
44
+ const bb = {};
45
+ const addrLine = findLine(lines, /^Property Address/i);
46
+ let propertyAddress = "", city = "", state = "", zipCode = "";
47
+ if (addrLine) {
48
+ for (const seg of addrLine.segments) {
49
+ const t = seg.text.trim();
50
+ if (/^Property Address\s/i.test(t)) {
51
+ propertyAddress = extractAfterLabel(seg, /^Property Address\s+/i);
52
+ bb.propertyAddress = toBBox(seg, addrLine);
53
+ }
54
+ else if (/^City\s/i.test(t)) {
55
+ city = extractAfterLabel(seg, /^City\s+/i);
56
+ bb.city = toBBox(seg, addrLine);
57
+ }
58
+ else if (/^State\s/i.test(t)) {
59
+ state = extractAfterLabel(seg, /^State\s+/i);
60
+ bb.state = toBBox(seg, addrLine);
61
+ }
62
+ else if (/^Zip Code\s/i.test(t)) {
63
+ zipCode = extractAfterLabel(seg, /^Zip Code\s+/i);
64
+ bb.zipCode = toBBox(seg, addrLine);
65
+ }
66
+ }
67
+ }
68
+ const borrowerLine = findLine(lines, /^Borrower\s/i);
69
+ let borrower = "", ownerOfPublicRecord = "", county = "";
70
+ if (borrowerLine) {
71
+ for (const seg of borrowerLine.segments) {
72
+ const t = seg.text.trim();
73
+ if (/^Borrower\s/i.test(t)) {
74
+ borrower = extractAfterLabel(seg, /^Borrower\s+/i);
75
+ bb.borrower = toBBox(seg, borrowerLine);
76
+ }
77
+ else if (/^Owner of Public Record\s/i.test(t)) {
78
+ ownerOfPublicRecord = extractAfterLabel(seg, /^Owner of Public Record\s+/i);
79
+ bb.ownerOfPublicRecord = toBBox(seg, borrowerLine);
80
+ }
81
+ else if (/^County\s/i.test(t)) {
82
+ county = extractAfterLabel(seg, /^County\s+/i);
83
+ bb.county = toBBox(seg, borrowerLine);
84
+ }
85
+ }
86
+ }
87
+ const legalLine = findLine(lines, /^Legal Description\s/i);
88
+ const legalDescription = legalLine ? labelValue(legalLine, /^Legal Description\s+/i).value : "";
89
+ if (legalLine?.segments[0])
90
+ bb.legalDescription = toBBox(legalLine.segments[0], legalLine);
91
+ const apnLine = findLine(lines, /^Assessor's Parcel #/i);
92
+ let assessorParcelNumber = "", taxYear = null, realEstateTaxes = null;
93
+ if (apnLine) {
94
+ for (const seg of apnLine.segments) {
95
+ const t = seg.text.trim();
96
+ if (/^Assessor's Parcel #/i.test(t)) {
97
+ assessorParcelNumber = extractAfterLabel(seg, /^Assessor's Parcel #\s*/i);
98
+ bb.assessorParcelNumber = toBBox(seg, apnLine);
99
+ }
100
+ else if (/^Tax Year/i.test(t)) {
101
+ taxYear = parseNum(extractAfterLabel(seg, /^Tax Year\s*/i));
102
+ bb.taxYear = toBBox(seg, apnLine);
103
+ }
104
+ else if (/^R\.?E\.?\s*Taxes\s*\$/i.test(t)) {
105
+ realEstateTaxes = parseNum(extractAfterLabel(seg, /^R\.?E\.?\s*Taxes\s*\$\s*/i));
106
+ bb.realEstateTaxes = toBBox(seg, apnLine);
107
+ }
108
+ }
109
+ }
110
+ const nhLine = findLine(lines, /^Neighborhood Name\s/i);
111
+ let neighborhoodName = "", mapReference = "", censusTract = "";
112
+ if (nhLine) {
113
+ for (const seg of nhLine.segments) {
114
+ const t = seg.text.trim();
115
+ if (/^Neighborhood Name/i.test(t)) {
116
+ neighborhoodName = extractAfterLabel(seg, /^Neighborhood Name\s+/i);
117
+ bb.neighborhoodName = toBBox(seg, nhLine);
118
+ }
119
+ else if (/^Map Reference/i.test(t)) {
120
+ mapReference = extractAfterLabel(seg, /^Map Reference\s+/i);
121
+ bb.mapReference = toBBox(seg, nhLine);
122
+ }
123
+ else if (/^Census Tract/i.test(t)) {
124
+ censusTract = extractAfterLabel(seg, /^Census Tract\s+/i);
125
+ bb.censusTract = toBBox(seg, nhLine);
126
+ }
127
+ }
128
+ }
129
+ const occLine = findLine(lines, /^Occupant/i);
130
+ let occupant = "", specialAssessments = null, hoaAmount = null;
131
+ if (occLine) {
132
+ // Occupant type (Owner/Tenant/Vacant) is a checkbox field — all labels
133
+ // appear as static text in flattened PDFs. Leave empty here; resolved
134
+ // via vector-graphic checkbox detection in parser.ts.
135
+ for (const seg of occLine.segments) {
136
+ const t = seg.text.trim();
137
+ if (/^Special Assessments\s*\$/i.test(t))
138
+ specialAssessments = parseNum(extractAfterLabel(seg, /^Special Assessments\s*\$\s*/i));
139
+ else if (/^HOA\s*\$/i.test(t))
140
+ hoaAmount = parseNum(extractAfterLabel(seg, /^HOA\s*\$\s*/i));
141
+ }
142
+ }
143
+ const prLine = findLine(lines, /^Property Rights Appraised/i);
144
+ let propertyRightsAppraised = "";
145
+ if (prLine) {
146
+ // Value is the segment after the label (e.g. "Fee Simple")
147
+ const prIdx = prLine.segments.findIndex((s) => /^Property Rights Appraised/i.test(s.text.trim()));
148
+ if (prIdx >= 0 && prLine.segments[prIdx + 1]) {
149
+ propertyRightsAppraised = prLine.segments[prIdx + 1].text.trim();
150
+ bb.propertyRightsAppraised = toBBox(prLine.segments[prIdx + 1], prLine);
151
+ }
152
+ }
153
+ const assignLine = findLine(lines, /Assignment Type/i);
154
+ let assignmentType = "";
155
+ if (assignLine) {
156
+ const atIdx = assignLine.segments.findIndex((s) => /Assignment Type/i.test(s.text));
157
+ if (atIdx >= 0 && assignLine.segments[atIdx + 1]) {
158
+ assignmentType = assignLine.segments[atIdx + 1].text.trim();
159
+ bb.assignmentType = toBBox(assignLine.segments[atIdx + 1], assignLine);
160
+ }
161
+ }
162
+ const lenderLine = findLine(lines, /^Lender\/Client\s/i);
163
+ let lenderClient = "", lenderAddress = "";
164
+ if (lenderLine) {
165
+ for (const seg of lenderLine.segments) {
166
+ const t = seg.text.trim();
167
+ if (/^Lender\/Client\s/i.test(t)) {
168
+ lenderClient = extractAfterLabel(seg, /^Lender\/Client\s+/i);
169
+ bb.lenderClient = toBBox(seg, lenderLine);
170
+ }
171
+ else if (/^Address\s/i.test(t)) {
172
+ lenderAddress = extractAfterLabel(seg, /^Address\s+/i);
173
+ bb.lenderAddress = toBBox(seg, lenderLine);
174
+ }
175
+ }
176
+ }
177
+ return {
178
+ propertyAddress, city, state, zipCode, borrower, ownerOfPublicRecord, county,
179
+ legalDescription, assessorParcelNumber, taxYear, realEstateTaxes,
180
+ neighborhoodName, mapReference, censusTract, occupant, specialAssessments,
181
+ hoaAmount, propertyRightsAppraised, assignmentType, lenderClient, lenderAddress,
182
+ boundingBoxes: bb,
183
+ };
184
+ }
185
+ // ── Contract ──────────────────────────────────────────────────────────────
186
+ export function parseContractSection(lines) {
187
+ const bb = {};
188
+ const offeredLine = findLine(lines, /currently offered for sale/i);
189
+ const isOfferedForSale = offeredLine?.fullText.includes("Yes") && !offeredLine?.fullText.includes("No") ? "Yes" : offeredLine?.fullText.includes("No") ? "No" : "";
190
+ const dataSourceLine = findLine(lines, /^Report data source/i);
191
+ let reportDataSources = "";
192
+ if (dataSourceLine) {
193
+ const valueSeg = dataSourceLine.segments.find((s) => s.x > 180);
194
+ if (valueSeg) {
195
+ reportDataSources = valueSeg.text.trim();
196
+ bb.reportDataSources = toBBox(valueSeg, dataSourceLine);
197
+ }
198
+ }
199
+ // Contract analysis text (spans multiple lines after "did/did not analyze")
200
+ const analysisLine = findLine(lines, /did not analyze the contract|did.*analyze the contract/i);
201
+ let contractAnalysis = "";
202
+ if (analysisLine) {
203
+ const idx = lines.indexOf(analysisLine);
204
+ const parts = [analysisLine.fullText];
205
+ for (let i = idx + 1; i < lines.length; i++) {
206
+ if (/^Contract Price/i.test(lines[i].fullText))
207
+ break;
208
+ parts.push(lines[i].fullText.trim());
209
+ }
210
+ contractAnalysis = parts.join(" ").replace(/^I\s+(did|did not).*?performed\.\s*/is, "").trim();
211
+ bb.contractAnalysis = toBBox(analysisLine.segments[0], analysisLine);
212
+ }
213
+ const contractLine = findLine(lines, /^Contract Price\s*\$/i);
214
+ let contractPrice = null, dateOfContract = "";
215
+ if (contractLine) {
216
+ for (const seg of contractLine.segments) {
217
+ const t = seg.text.trim();
218
+ if (/^Contract Price\s*\$/i.test(t)) {
219
+ contractPrice = parseNum(extractAfterLabel(seg, /^Contract Price\s*\$\s*/i));
220
+ bb.contractPrice = toBBox(seg, contractLine);
221
+ }
222
+ else if (/^Date of Contract\s/i.test(t)) {
223
+ dateOfContract = extractAfterLabel(seg, /^Date of Contract\s+/i);
224
+ bb.dateOfContract = toBBox(seg, contractLine);
225
+ }
226
+ }
227
+ }
228
+ const assistLine = findLine(lines, /If Yes, report the total dollar/i);
229
+ let financialAssistanceAmount = "";
230
+ if (assistLine) {
231
+ const valueSeg = assistLine.segments.find((s) => /^\$/.test(s.text.trim()));
232
+ if (valueSeg) {
233
+ financialAssistanceAmount = valueSeg.text.trim();
234
+ bb.financialAssistance = toBBox(valueSeg, assistLine);
235
+ }
236
+ }
237
+ return { isOfferedForSale, reportDataSources, contractAnalysis, contractPrice, dateOfContract, financialAssistanceAmount, boundingBoxes: bb };
238
+ }
239
+ // ── Neighborhood ──────────────────────────────────────────────────────────
240
+ export function parseNeighborhoodSection(lines) {
241
+ const bb = {};
242
+ // Checkbox fields (Location, Built-Up, Growth, Property Values, etc.) have all
243
+ // option labels rendered as static text — the checked state is a graphical vector
244
+ // path and cannot be extracted from text. We leave location empty here; the main
245
+ // parser derives it from the UAD-coded subject location on page 2.
246
+ let location = "";
247
+ // Price and age data from the "Low/High/Pred" area
248
+ const priceLine = findLine(lines, /^\$\s*\(000\)|Low|priceLow/i, { minY: 330 });
249
+ // These values are scattered across the grid lines
250
+ let priceLow = null, priceHigh = null, pricePredominant = null;
251
+ let ageLow = null, ageHigh = null, agePredominant = null;
252
+ // Look for the three grid rows with price/age/land use data
253
+ const growthLine = findLine(lines, /^Growth/i);
254
+ const boundaryLine = findLine(lines, /^Neighborhood Boundaries/i);
255
+ const predLine = lines.find((l) => l.y > (growthLine?.y ?? 0) && /Pred\.?/i.test(l.fullText));
256
+ if (growthLine) {
257
+ // Growth line has: Low price, Low age, Multi-Family %
258
+ const priceSegs = growthLine.segments.filter((s) => s.x >= 430 && s.x < 490);
259
+ if (priceSegs.length > 0) {
260
+ const match = priceSegs[0].text.match(/(\d[\d,]*)\s+Low/i) || priceSegs[0].text.match(/^(\d[\d,]*)/);
261
+ if (match)
262
+ priceLow = parseNum(match[1]);
263
+ bb.priceLow = toBBox(priceSegs[0], growthLine);
264
+ }
265
+ const ageSegs = growthLine.segments.filter((s) => s.x >= 485 && s.x < 510);
266
+ if (ageSegs.length > 0) {
267
+ ageLow = parseNum(ageSegs[0].text.replace(/\D*$/, ""));
268
+ bb.ageLow = toBBox(ageSegs[0], growthLine);
269
+ }
270
+ }
271
+ if (boundaryLine) {
272
+ const priceSegs = boundaryLine.segments.filter((s) => s.x >= 430 && s.x < 490);
273
+ if (priceSegs.length > 0) {
274
+ const match = priceSegs[0].text.match(/(\d[\d,]*)\s+High/i) || priceSegs[0].text.match(/^(\d[\d,]*)/);
275
+ if (match)
276
+ priceHigh = parseNum(match[1]);
277
+ bb.priceHigh = toBBox(priceSegs[0], boundaryLine);
278
+ }
279
+ const ageSegs = boundaryLine.segments.filter((s) => s.x >= 485 && s.x < 510);
280
+ if (ageSegs.length > 0) {
281
+ ageHigh = parseNum(ageSegs[0].text.replace(/\D*$/, ""));
282
+ bb.ageHigh = toBBox(ageSegs[0], boundaryLine);
283
+ }
284
+ }
285
+ if (predLine) {
286
+ const priceSegs = predLine.segments.filter((s) => s.x >= 430 && s.x < 490);
287
+ if (priceSegs.length > 0) {
288
+ const match = priceSegs[0].text.match(/(\d[\d,]*)\s+Pred/i) || priceSegs[0].text.match(/^(\d[\d,]*)/);
289
+ if (match)
290
+ pricePredominant = parseNum(match[1]);
291
+ bb.pricePredominant = toBBox(priceSegs[0], predLine);
292
+ }
293
+ const ageSegs = predLine.segments.filter((s) => s.x >= 485 && s.x < 510);
294
+ if (ageSegs.length > 0) {
295
+ agePredominant = parseNum(ageSegs[0].text.replace(/\D*$/, ""));
296
+ bb.agePredominant = toBBox(ageSegs[0], predLine);
297
+ }
298
+ }
299
+ // Land use percentages from the rightmost column
300
+ let landUseOneUnit = null, landUseTwoFourUnit = null;
301
+ let landUseMultiFamily = null, landUseCommercial = null, landUseOther = null;
302
+ for (const l of lines) {
303
+ if (l.y < 320 || l.y > 390)
304
+ continue;
305
+ for (const seg of l.segments) {
306
+ if (seg.x < 505)
307
+ continue;
308
+ const pct = seg.text.match(/(\d+)\s*%/);
309
+ if (!pct)
310
+ continue;
311
+ const val = parseNum(pct[1]);
312
+ const label = l.segments.find((s) => s.x >= 505 && s.x < 575 && /One-Unit|2-4 Unit|Multi-?Family|Commercial|Other/i.test(s.text));
313
+ const labelText = label?.text ?? l.fullText;
314
+ if (/One-Unit/i.test(labelText)) {
315
+ landUseOneUnit = val;
316
+ bb.landUseOneUnit = toBBox(seg, l);
317
+ }
318
+ else if (/2-4 Unit/i.test(labelText)) {
319
+ landUseTwoFourUnit = val;
320
+ bb.landUseTwoFourUnit = toBBox(seg, l);
321
+ }
322
+ else if (/Multi/i.test(labelText)) {
323
+ landUseMultiFamily = val;
324
+ bb.landUseMultiFamily = toBBox(seg, l);
325
+ }
326
+ else if (/Commercial/i.test(labelText)) {
327
+ landUseCommercial = val;
328
+ bb.landUseCommercial = toBBox(seg, l);
329
+ }
330
+ else if (/Other/i.test(labelText)) {
331
+ landUseOther = val;
332
+ bb.landUseOther = toBBox(seg, l);
333
+ }
334
+ }
335
+ }
336
+ // Neighborhood boundaries
337
+ const boundLine = findLine(lines, /^Neighborhood Boundaries/i);
338
+ let boundaries = "";
339
+ if (boundLine) {
340
+ const valueSeg = boundLine.segments.find((s) => s.x > 100);
341
+ if (valueSeg) {
342
+ boundaries = valueSeg.text.trim();
343
+ bb.boundaries = toBBox(valueSeg, boundLine);
344
+ }
345
+ // May continue on next line
346
+ const idx = lines.indexOf(boundLine);
347
+ if (idx >= 0 && lines[idx + 1] && lines[idx + 1].y < (boundLine.y + 20)) {
348
+ const nextSeg = lines[idx + 1].segments.find((s) => s.x < 430);
349
+ if (nextSeg)
350
+ boundaries += " " + nextSeg.text.trim();
351
+ }
352
+ }
353
+ // Neighborhood description
354
+ const descLine = findLine(lines, /^Neighborhood Description/i);
355
+ let description = "";
356
+ if (descLine) {
357
+ const idx = lines.indexOf(descLine);
358
+ const parts = [];
359
+ const valueSeg = descLine.segments.find((s) => s.x > 100);
360
+ if (valueSeg) {
361
+ parts.push(valueSeg.text.trim());
362
+ bb.description = toBBox(valueSeg, descLine);
363
+ }
364
+ for (let i = idx + 1; i < lines.length; i++) {
365
+ if (/^Market Conditions|^Dimensions|^NEIGHBORHOOD$/i.test(lines[i].fullText.trim()) && !/NEIGHBORHOOD CALLED/i.test(lines[i].fullText)) {
366
+ if (lines[i].segments.every((s) => s.x < 430))
367
+ break;
368
+ }
369
+ const txt = lines[i].segments.filter((s) => s.x < 430).map((s) => s.text.trim()).join(" ");
370
+ if (txt && !/^NEIGHBORHOOD$/i.test(txt.trim()))
371
+ parts.push(txt);
372
+ if (/^Market Conditions/i.test(lines[i].fullText))
373
+ break;
374
+ }
375
+ description = parts.join(" ").trim();
376
+ }
377
+ // Market conditions
378
+ const mcLine = findLine(lines, /^Market Conditions\s*\(/i);
379
+ let marketConditions = "";
380
+ if (mcLine) {
381
+ const idx = lines.indexOf(mcLine);
382
+ const valueSeg = mcLine.segments.find((s) => s.x > 200);
383
+ const parts = [];
384
+ if (valueSeg) {
385
+ parts.push(valueSeg.text.trim());
386
+ bb.marketConditions = toBBox(valueSeg, mcLine);
387
+ }
388
+ for (let i = idx + 1; i < lines.length; i++) {
389
+ if (/^Dimensions|^THIS OPINION/i.test(lines[i].fullText)) {
390
+ if (/^THIS OPINION/i.test(lines[i].fullText))
391
+ parts.push(lines[i].fullText.trim());
392
+ break;
393
+ }
394
+ parts.push(lines[i].fullText.trim());
395
+ }
396
+ marketConditions = parts.join(" ").trim();
397
+ }
398
+ return {
399
+ location, builtUp: "", growth: "", propertyValues: "", demandSupply: "", marketingTime: "",
400
+ priceLow, priceHigh, pricePredominant, ageLow, ageHigh, agePredominant,
401
+ landUseOneUnit, landUseTwoFourUnit, landUseMultiFamily, landUseCommercial, landUseOther,
402
+ boundaries, description, marketConditions, boundingBoxes: bb,
403
+ };
404
+ }
405
+ // ── Site ──────────────────────────────────────────────────────────────────
406
+ export function parseSiteSection(lines) {
407
+ const bb = {};
408
+ const dimLine = findLine(lines, /^Dimensions\s/i);
409
+ let dimensions = "", area = "", shape = "", view = "";
410
+ if (dimLine) {
411
+ for (const seg of dimLine.segments) {
412
+ const t = seg.text.trim();
413
+ if (/^Dimensions\s/i.test(t)) {
414
+ dimensions = extractAfterLabel(seg, /^Dimensions\s+/i);
415
+ bb.dimensions = toBBox(seg, dimLine);
416
+ }
417
+ else if (/^Area\s/i.test(t)) {
418
+ area = extractAfterLabel(seg, /^Area\s+/i);
419
+ bb.area = toBBox(seg, dimLine);
420
+ }
421
+ else if (/^Shape\s/i.test(t)) {
422
+ shape = extractAfterLabel(seg, /^Shape\s+/i);
423
+ bb.shape = toBBox(seg, dimLine);
424
+ }
425
+ else if (/^View\s/i.test(t)) {
426
+ view = extractAfterLabel(seg, /^View\s+/i);
427
+ bb.view = toBBox(seg, dimLine);
428
+ }
429
+ }
430
+ }
431
+ const zonLine = findLine(lines, /^Specific Zoning Classification/i);
432
+ let zoningClassification = "", zoningDescription = "";
433
+ if (zonLine) {
434
+ for (const seg of zonLine.segments) {
435
+ const t = seg.text.trim();
436
+ if (/^Specific Zoning Classification/i.test(t)) {
437
+ zoningClassification = extractAfterLabel(seg, /^Specific Zoning Classification\s+/i);
438
+ bb.zoningClassification = toBBox(seg, zonLine);
439
+ }
440
+ else if (/^Zoning Description/i.test(t)) {
441
+ zoningDescription = extractAfterLabel(seg, /^Zoning Description\s+/i);
442
+ bb.zoningDescription = toBBox(seg, zonLine);
443
+ }
444
+ }
445
+ }
446
+ const compLine = findLine(lines, /^Zoning Compliance/i);
447
+ let zoningCompliance = "";
448
+ if (compLine) {
449
+ const compIdx = compLine.segments.findIndex((s) => /^Zoning Compliance/i.test(s.text.trim()));
450
+ if (compIdx >= 0 && compLine.segments[compIdx + 1]) {
451
+ zoningCompliance = compLine.segments[compIdx + 1].text.trim();
452
+ bb.zoningCompliance = toBBox(compLine.segments[compIdx + 1], compLine);
453
+ }
454
+ }
455
+ const hbuLine = findLine(lines, /highest and best use/i);
456
+ const highestAndBestUse = hbuLine?.fullText.includes("SEE ATTACHED") ? "SEE ATTACHED" : hbuLine?.fullText.includes("Yes") ? "Yes" : "";
457
+ const femaLine = findLine(lines, /^FEMA Special Flood/i);
458
+ let femaFloodZone = "", femaMapNumber = "", femaMapDate = "";
459
+ if (femaLine) {
460
+ for (const seg of femaLine.segments) {
461
+ const t = seg.text.trim();
462
+ if (/FEMA Flood Zone\s/i.test(t))
463
+ femaFloodZone = extractAfterLabel(seg, /.*?FEMA Flood Zone\s+/i);
464
+ else if (/FEMA Map #\s/i.test(t)) {
465
+ femaMapNumber = extractAfterLabel(seg, /^FEMA Map #\s+/i);
466
+ bb.femaMapNumber = toBBox(seg, femaLine);
467
+ }
468
+ else if (/FEMA Map Date\s/i.test(t)) {
469
+ femaMapDate = extractAfterLabel(seg, /^FEMA Map Date\s+/i);
470
+ bb.femaMapDate = toBBox(seg, femaLine);
471
+ }
472
+ }
473
+ }
474
+ // Adverse conditions text
475
+ const adverseLine = findLine(lines, /adverse site conditions/i);
476
+ let adverseConditions = "";
477
+ if (adverseLine) {
478
+ const idx = lines.indexOf(adverseLine);
479
+ const parts = [];
480
+ for (let i = idx + 1; i < lines.length; i++) {
481
+ if (/^General Description|^Units/i.test(lines[i].fullText))
482
+ break;
483
+ const txt = lines[i].fullText.trim();
484
+ if (txt && !/^CURRENT LAND SURVEY/.test(txt))
485
+ parts.push(txt);
486
+ else if (/^CURRENT LAND SURVEY/.test(txt)) {
487
+ parts.push(txt);
488
+ break;
489
+ }
490
+ }
491
+ adverseConditions = parts.join(" ").trim();
492
+ if (adverseLine.segments[0])
493
+ bb.adverseConditions = toBBox(adverseLine.segments[0], adverseLine);
494
+ }
495
+ return { dimensions, area, shape, view, zoningClassification, zoningDescription, zoningCompliance, highestAndBestUse, femaFloodZone, femaMapNumber, femaMapDate, adverseConditions, boundingBoxes: bb };
496
+ }
497
+ // ── Improvements ──────────────────────────────────────────────────────────
498
+ export function parseImprovementsSection(lines) {
499
+ const bb = {};
500
+ const storiesLine = findLine(lines, /^# of Stories/i);
501
+ let stories = null;
502
+ if (storiesLine) {
503
+ const seg = storiesLine.segments.find((s) => /^# of Stories/i.test(s.text));
504
+ if (seg) {
505
+ const next = storiesLine.segments.find((s) => s.x > seg.x && s.x < 180);
506
+ if (next) {
507
+ stories = parseNum(next.text);
508
+ bb.stories = toBBox(next, storiesLine);
509
+ }
510
+ else {
511
+ stories = parseNum(extractAfterLabel(seg, /^# of Stories\s*/i));
512
+ bb.stories = toBBox(seg, storiesLine);
513
+ }
514
+ }
515
+ }
516
+ const designLine = findLine(lines, /^Design \(Style\)/i);
517
+ let designStyle = "";
518
+ if (designLine) {
519
+ const seg = designLine.segments.find((s) => /^Design \(Style\)/i.test(s.text));
520
+ if (seg) {
521
+ const next = designLine.segments.find((s) => s.x > seg.x && s.x < 180);
522
+ if (next) {
523
+ designStyle = next.text.trim();
524
+ bb.designStyle = toBBox(next, designLine);
525
+ }
526
+ else {
527
+ designStyle = extractAfterLabel(seg, /^Design \(Style\)\s*/i);
528
+ bb.designStyle = toBBox(seg, designLine);
529
+ }
530
+ }
531
+ }
532
+ const yrLine = findLine(lines, /^Year Built/i);
533
+ let yearBuilt = null;
534
+ if (yrLine) {
535
+ const seg = yrLine.segments.find((s) => /^Year Built/i.test(s.text));
536
+ if (seg) {
537
+ const next = yrLine.segments.find((s) => s.x > seg.x && s.x < 170);
538
+ if (next) {
539
+ yearBuilt = parseNum(next.text);
540
+ bb.yearBuilt = toBBox(next, yrLine);
541
+ }
542
+ else {
543
+ yearBuilt = parseNum(extractAfterLabel(seg, /^Year Built\s*/i));
544
+ bb.yearBuilt = toBBox(seg, yrLine);
545
+ }
546
+ }
547
+ }
548
+ const effLine = findLine(lines, /^Effective Age/i);
549
+ let effectiveAge = null;
550
+ if (effLine) {
551
+ const match = effLine.fullText.match(/Effective Age \(Yrs\)\s+(\d+)/i);
552
+ if (match)
553
+ effectiveAge = parseNum(match[1]);
554
+ }
555
+ // Exterior & Interior descriptions from the grid columns
556
+ // Foundation Walls, Exterior Walls, Roof, etc are in the ~x:311-400 and ~x:465+ ranges
557
+ function findMaterialCondition(label) {
558
+ for (const l of lines) {
559
+ if (l.y < 615 || l.y > 760)
560
+ continue;
561
+ for (const seg of l.segments) {
562
+ if (label.test(seg.text)) {
563
+ // Value is after the label on same line in the material/condition column
564
+ const val = seg.text.replace(label, "").trim();
565
+ if (val) {
566
+ bb[label.source.replace(/[^a-zA-Z]/g, "")] = toBBox(seg, l);
567
+ return val;
568
+ }
569
+ // Or the next segment
570
+ const idx = l.segments.indexOf(seg);
571
+ if (l.segments[idx + 1])
572
+ return l.segments[idx + 1].text.trim();
573
+ }
574
+ }
575
+ }
576
+ return "";
577
+ }
578
+ const foundationWalls = findMaterialCondition(/^Foundation Walls\s*/i);
579
+ const exteriorWalls = findMaterialCondition(/^Exterior Walls\s*/i);
580
+ const roofSurface = findMaterialCondition(/^Roof Surface\s*/i);
581
+ const guttersDownspouts = findMaterialCondition(/^Gutters & Downspouts\s*/i);
582
+ const windowType = findMaterialCondition(/^Window Type\s*/i);
583
+ const floors = findMaterialCondition(/^Floors\s*/i);
584
+ const walls = findMaterialCondition(/^Walls\s*/i);
585
+ const trimFinish = findMaterialCondition(/^Trim\/Finish\s*/i);
586
+ const bathFloor = findMaterialCondition(/^Bath Floor\s*/i);
587
+ const bathWainscot = findMaterialCondition(/^Bath Wainscot\s*/i);
588
+ // Heating
589
+ const heatLine = findLine(lines, /Heating/i, { minY: 700 });
590
+ let heatingType = "", heatingFuel = "";
591
+ if (heatLine) {
592
+ if (/FWA/i.test(heatLine.fullText))
593
+ heatingType = "FWA";
594
+ else if (/HWBB/i.test(heatLine.fullText))
595
+ heatingType = "HWBB";
596
+ else if (/Radiant/i.test(heatLine.fullText))
597
+ heatingType = "Radiant";
598
+ const fuelSeg = heatLine.segments.find((s) => /Fuel\s/i.test(s.text));
599
+ if (fuelSeg)
600
+ heatingFuel = extractAfterLabel(fuelSeg, /.*?Fuel\s+/i);
601
+ }
602
+ const coolLine = findLine(lines, /Cooling/i, { minY: 720 });
603
+ let coolingType = "";
604
+ if (coolLine && /Central Air/i.test(coolLine.fullText))
605
+ coolingType = "Central";
606
+ else if (coolLine && /Individual/i.test(coolLine.fullText))
607
+ coolingType = "Individual";
608
+ // Amenities
609
+ let fireplaces = null, patioOrDeck = "", pool = "", fence = "", porch = "";
610
+ for (const l of lines) {
611
+ if (l.y < 710 || l.y > 755)
612
+ continue;
613
+ for (const seg of l.segments) {
614
+ const t = seg.text.trim();
615
+ if (/^Fireplace\(s\) #\s/i.test(t))
616
+ fireplaces = parseNum(extractAfterLabel(seg, /^Fireplace\(s\) #\s*/i));
617
+ else if (/^Patio\/Deck\s/i.test(t)) {
618
+ patioOrDeck = extractAfterLabel(seg, /^Patio\/Deck\s+/i);
619
+ bb.patioOrDeck = toBBox(seg, l);
620
+ }
621
+ else if (/^Pool\s/i.test(t)) {
622
+ pool = extractAfterLabel(seg, /^Pool\s+/i);
623
+ bb.pool = toBBox(seg, l);
624
+ }
625
+ else if (/^Fence\s/i.test(t)) {
626
+ fence = extractAfterLabel(seg, /^Fence\s+/i);
627
+ bb.fence = toBBox(seg, l);
628
+ }
629
+ else if (/^Porch\s/i.test(t)) {
630
+ porch = extractAfterLabel(seg, /^Porch\s+/i);
631
+ bb.porch = toBBox(seg, l);
632
+ }
633
+ }
634
+ }
635
+ // Car storage
636
+ let drivewayCarCount = null, drivewaySurface = "", garageCarCount = null, carportCarCount = null;
637
+ for (const l of lines) {
638
+ if (l.y < 685 || l.y > 755)
639
+ continue;
640
+ for (const seg of l.segments) {
641
+ const t = seg.text.trim();
642
+ if (/^Driveway Surface\s/i.test(t))
643
+ drivewaySurface = extractAfterLabel(seg, /^Driveway Surface\s+/i);
644
+ }
645
+ // # of Cars values are at x ~566
646
+ const carsSegs = l.segments.filter((s) => s.x >= 555 && s.x < 590 && /^\d+$/.test(s.text.trim()));
647
+ if (carsSegs.length > 0) {
648
+ if (/Driveway.*# of Cars/i.test(l.fullText))
649
+ drivewayCarCount = parseNum(carsSegs[0].text);
650
+ else if (/Garage.*# of Cars/i.test(l.fullText))
651
+ garageCarCount = parseNum(carsSegs[0].text);
652
+ else if (/Carport.*# of Cars/i.test(l.fullText))
653
+ carportCarCount = parseNum(carsSegs[0].text);
654
+ }
655
+ }
656
+ // Room count line
657
+ const roomLine = findLine(lines, /^Finished area above grade/i);
658
+ let roomCount = null, bedrooms = null, baths = null, grossLivingArea = null;
659
+ if (roomLine) {
660
+ for (const seg of roomLine.segments) {
661
+ const t = seg.text.trim();
662
+ const roomMatch = t.match(/^(\d+)\s*Rooms?/i);
663
+ if (roomMatch) {
664
+ roomCount = parseNum(roomMatch[1]);
665
+ bb.roomCount = toBBox(seg, roomLine);
666
+ }
667
+ const bedMatch = t.match(/^(\d+)\s*Bedrooms?/i);
668
+ if (bedMatch) {
669
+ bedrooms = parseNum(bedMatch[1]);
670
+ bb.bedrooms = toBBox(seg, roomLine);
671
+ }
672
+ const bathMatch = t.match(/^([\d.]+)\s*Bath/i);
673
+ if (bathMatch) {
674
+ baths = parseNum(bathMatch[1]);
675
+ bb.baths = toBBox(seg, roomLine);
676
+ }
677
+ const sqftMatch = t.match(/^([\d,]+)\s*Square Feet/i);
678
+ if (sqftMatch) {
679
+ grossLivingArea = parseNum(sqftMatch[1]);
680
+ bb.grossLivingArea = toBBox(seg, roomLine);
681
+ }
682
+ }
683
+ }
684
+ // Additional features
685
+ const featLine = findLine(lines, /^Additional features/i);
686
+ let additionalFeatures = "";
687
+ if (featLine) {
688
+ const valueSeg = featLine.segments.find((s) => s.x > 180);
689
+ if (valueSeg) {
690
+ additionalFeatures = valueSeg.text.trim();
691
+ bb.additionalFeatures = toBBox(valueSeg, featLine);
692
+ }
693
+ }
694
+ // Condition description
695
+ const condLine = findLine(lines, /^Describe the condition|^IMPROVEMENTS.*Describe the condition/i);
696
+ let conditionDescription = "";
697
+ if (condLine) {
698
+ const idx = lines.indexOf(condLine);
699
+ const parts = [];
700
+ // Check if value is on same line at high x
701
+ const valueSeg = condLine.segments.find((s) => s.x > 350);
702
+ if (valueSeg)
703
+ parts.push(valueSeg.text.trim());
704
+ for (let i = idx + 1; i < lines.length; i++) {
705
+ if (/^Are there any physical deficiencies/i.test(lines[i].fullText))
706
+ break;
707
+ parts.push(lines[i].fullText.trim());
708
+ }
709
+ conditionDescription = parts.join(" ").trim();
710
+ if (condLine.segments[0])
711
+ bb.conditionDescription = toBBox(condLine.segments[0], condLine);
712
+ }
713
+ // Physical deficiencies
714
+ const defLine = findLine(lines, /^Are there any physical deficiencies/i);
715
+ let physicalDeficiencies = "";
716
+ if (defLine) {
717
+ const idx = lines.indexOf(defLine);
718
+ const parts = [];
719
+ for (let i = idx + 1; i < lines.length; i++) {
720
+ if (/^Does the property generally conform/i.test(lines[i].fullText))
721
+ break;
722
+ parts.push(lines[i].fullText.trim());
723
+ }
724
+ physicalDeficiencies = parts.join(" ").trim();
725
+ }
726
+ // Conformity
727
+ const confLine = findLine(lines, /^Does the property generally conform/i);
728
+ let conformity = "";
729
+ if (confLine) {
730
+ const idx = lines.indexOf(confLine);
731
+ const parts = [];
732
+ for (let i = idx + 1; i < lines.length; i++) {
733
+ if (/^Freddie Mac Form|^Form 1004UAD/i.test(lines[i].fullText))
734
+ break;
735
+ parts.push(lines[i].fullText.trim());
736
+ }
737
+ conformity = parts.join(" ").trim();
738
+ }
739
+ const type = findLine(lines, /^Type/i, { minY: 640, maxY: 660 });
740
+ let typeStr = "";
741
+ if (type) {
742
+ const detSeg = type.segments.find((s) => /^Det\./i.test(s.text.trim()));
743
+ if (detSeg)
744
+ typeStr = "Detached";
745
+ const attSeg = type.segments.find((s) => /^Att\./i.test(s.text.trim()));
746
+ if (attSeg)
747
+ typeStr = "Attached";
748
+ }
749
+ return {
750
+ stories, type: typeStr, designStyle, yearBuilt, effectiveAge,
751
+ foundationWalls, exteriorWalls, roofSurface, guttersDownspouts, windowType,
752
+ floors, walls, trimFinish, bathFloor, bathWainscot,
753
+ heatingType, heatingFuel, coolingType,
754
+ fireplaces, patioOrDeck, pool, fence, porch,
755
+ drivewayCarCount, drivewaySurface, garageCarCount, carportCarCount,
756
+ roomCount, bedrooms, baths, grossLivingArea,
757
+ additionalFeatures, conditionDescription, physicalDeficiencies, conformity,
758
+ boundingBoxes: bb,
759
+ };
760
+ }