@parseo/appraisals 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +35 -0
  2. package/dist/form-1004mc/extract-checkboxes.d.ts +43 -0
  3. package/dist/form-1004mc/extract-checkboxes.d.ts.map +1 -0
  4. package/dist/form-1004mc/extract-checkboxes.js +145 -0
  5. package/dist/form-1004mc/index.d.ts +3 -0
  6. package/dist/form-1004mc/index.d.ts.map +1 -0
  7. package/dist/form-1004mc/index.js +1 -0
  8. package/dist/form-1004mc/parse-page1.d.ts +8 -0
  9. package/dist/form-1004mc/parse-page1.d.ts.map +1 -0
  10. package/dist/form-1004mc/parse-page1.js +760 -0
  11. package/dist/form-1004mc/parse-sales.d.ts +6 -0
  12. package/dist/form-1004mc/parse-sales.d.ts.map +1 -0
  13. package/dist/form-1004mc/parse-sales.js +505 -0
  14. package/dist/form-1004mc/parser.d.ts +5 -0
  15. package/dist/form-1004mc/parser.d.ts.map +1 -0
  16. package/dist/form-1004mc/parser.js +437 -0
  17. package/dist/form-1004mc/types.d.ts +302 -0
  18. package/dist/form-1004mc/types.d.ts.map +1 -0
  19. package/dist/form-1004mc/types.js +1 -0
  20. package/dist/form-1073/index.d.ts +3 -0
  21. package/dist/form-1073/index.d.ts.map +1 -0
  22. package/dist/form-1073/index.js +1 -0
  23. package/dist/form-1073/parse-page1.d.ts +8 -0
  24. package/dist/form-1073/parse-page1.d.ts.map +1 -0
  25. package/dist/form-1073/parse-page1.js +704 -0
  26. package/dist/form-1073/parse-page2.d.ts +6 -0
  27. package/dist/form-1073/parse-page2.d.ts.map +1 -0
  28. package/dist/form-1073/parse-page2.js +438 -0
  29. package/dist/form-1073/parse-sales.d.ts +7 -0
  30. package/dist/form-1073/parse-sales.d.ts.map +1 -0
  31. package/dist/form-1073/parse-sales.js +477 -0
  32. package/dist/form-1073/parser.d.ts +5 -0
  33. package/dist/form-1073/parser.d.ts.map +1 -0
  34. package/dist/form-1073/parser.js +102 -0
  35. package/dist/form-1073/types.d.ts +300 -0
  36. package/dist/form-1073/types.d.ts.map +1 -0
  37. package/dist/form-1073/types.js +1 -0
  38. package/dist/index.d.ts +13 -0
  39. package/dist/index.d.ts.map +1 -0
  40. package/dist/index.js +9 -0
  41. package/dist/richer-values/index.d.ts +3 -0
  42. package/dist/richer-values/index.d.ts.map +1 -0
  43. package/dist/richer-values/index.js +1 -0
  44. package/dist/richer-values/parser.d.ts +5 -0
  45. package/dist/richer-values/parser.d.ts.map +1 -0
  46. package/dist/richer-values/parser.js +1067 -0
  47. package/dist/richer-values/types.d.ts +225 -0
  48. package/dist/richer-values/types.d.ts.map +1 -0
  49. package/dist/richer-values/types.js +1 -0
  50. package/package.json +24 -0
@@ -0,0 +1,704 @@
1
+ import { toBBox } from "@parseo/shared";
2
+ // ── Utilities ────────────────────────────────────────────────────────────
3
+ function parseNum(raw) {
4
+ if (!raw)
5
+ return null;
6
+ const cleaned = raw.replace(/[$,%]/g, "").replace(/,/g, "").trim();
7
+ if (!cleaned)
8
+ return null;
9
+ const n = Number(cleaned);
10
+ return Number.isNaN(n) ? null : n;
11
+ }
12
+ function extractAfterLabel(seg, label) {
13
+ return seg.text.replace(label, "").trim();
14
+ }
15
+ function findLine(lines, pattern, opts) {
16
+ return lines.find((l) => {
17
+ if (opts?.minY !== undefined && l.y < opts.minY)
18
+ return false;
19
+ if (opts?.maxY !== undefined && l.y > opts.maxY)
20
+ return false;
21
+ return pattern.test(l.fullText);
22
+ });
23
+ }
24
+ // ── Subject ──────────────────────────────────────────────────────────────
25
+ export function parseSubjectSection(lines) {
26
+ const bb = {};
27
+ const addrLine = findLine(lines, /^Property Address/i);
28
+ let propertyAddress = "", unitNumber = "", city = "", state = "", zipCode = "";
29
+ if (addrLine) {
30
+ for (const seg of addrLine.segments) {
31
+ const t = seg.text.trim();
32
+ if (/^Property Address\s/i.test(t)) {
33
+ propertyAddress = extractAfterLabel(seg, /^Property Address\s+/i);
34
+ bb.propertyAddress = toBBox(seg, addrLine);
35
+ }
36
+ else if (/^Unit #\s/i.test(t)) {
37
+ unitNumber = extractAfterLabel(seg, /^Unit #\s+/i);
38
+ bb.unitNumber = toBBox(seg, addrLine);
39
+ }
40
+ else if (/^City\s/i.test(t)) {
41
+ city = extractAfterLabel(seg, /^City\s+/i);
42
+ bb.city = toBBox(seg, addrLine);
43
+ }
44
+ else if (/^State\s/i.test(t)) {
45
+ state = extractAfterLabel(seg, /^State\s+/i);
46
+ bb.state = toBBox(seg, addrLine);
47
+ }
48
+ else if (/^Zip Code\s/i.test(t)) {
49
+ zipCode = extractAfterLabel(seg, /^Zip Code\s+/i);
50
+ bb.zipCode = toBBox(seg, addrLine);
51
+ }
52
+ }
53
+ }
54
+ const borrowerLine = findLine(lines, /^Borrower\s/i);
55
+ let borrower = "", ownerOfPublicRecord = "", county = "";
56
+ if (borrowerLine) {
57
+ for (const seg of borrowerLine.segments) {
58
+ const t = seg.text.trim();
59
+ if (/^Borrower\s/i.test(t)) {
60
+ borrower = extractAfterLabel(seg, /^Borrower\s+/i);
61
+ bb.borrower = toBBox(seg, borrowerLine);
62
+ }
63
+ else if (/Owner of Public Record/i.test(t)) {
64
+ let raw = extractAfterLabel(seg, /^Owner of Public Record\s*/i);
65
+ // County may be concatenated at the end (e.g. "...TrustCounty Palm Beach")
66
+ const countyMatch = raw.match(/County\s+(.+)$/i);
67
+ if (countyMatch) {
68
+ county = countyMatch[1].trim();
69
+ raw = raw.slice(0, countyMatch.index).trim();
70
+ }
71
+ ownerOfPublicRecord = raw;
72
+ bb.ownerOfPublicRecord = toBBox(seg, borrowerLine);
73
+ }
74
+ else if (/^County\s/i.test(t)) {
75
+ county = extractAfterLabel(seg, /^County\s+/i);
76
+ bb.county = toBBox(seg, borrowerLine);
77
+ }
78
+ }
79
+ }
80
+ const legalLine = findLine(lines, /^Legal Description\s/i);
81
+ const legalDescription = legalLine ? extractAfterLabel(legalLine.segments[0] ?? { text: "" }, /^Legal Description\s+/i) : "";
82
+ if (legalLine?.segments[0])
83
+ bb.legalDescription = toBBox(legalLine.segments[0], legalLine);
84
+ const apnLine = findLine(lines, /^Assessor's Parcel #/i);
85
+ let assessorParcelNumber = "", taxYear = null, realEstateTaxes = null;
86
+ if (apnLine) {
87
+ for (const seg of apnLine.segments) {
88
+ const t = seg.text.trim();
89
+ if (/^Assessor's Parcel #/i.test(t)) {
90
+ assessorParcelNumber = extractAfterLabel(seg, /^Assessor's Parcel #\s*/i);
91
+ bb.assessorParcelNumber = toBBox(seg, apnLine);
92
+ }
93
+ else if (/^Tax Year/i.test(t)) {
94
+ taxYear = parseNum(extractAfterLabel(seg, /^Tax Year\s*/i));
95
+ }
96
+ else if (/^R\.?E\.?\s*Taxes\s*\$/i.test(t)) {
97
+ realEstateTaxes = parseNum(extractAfterLabel(seg, /^R\.?E\.?\s*Taxes\s*\$\s*/i));
98
+ bb.realEstateTaxes = toBBox(seg, apnLine);
99
+ }
100
+ }
101
+ }
102
+ const projLine = findLine(lines, /^Project Name\s/i);
103
+ let projectName = "", phase = "", mapReference = "", censusTract = "";
104
+ if (projLine) {
105
+ for (const seg of projLine.segments) {
106
+ const t = seg.text.trim();
107
+ if (/^Project Name\s/i.test(t)) {
108
+ projectName = extractAfterLabel(seg, /^Project Name\s+/i);
109
+ bb.projectName = toBBox(seg, projLine);
110
+ }
111
+ else if (/^Phase #\s/i.test(t)) {
112
+ phase = extractAfterLabel(seg, /^Phase #\s+/i);
113
+ }
114
+ else if (/^Map Reference/i.test(t)) {
115
+ mapReference = extractAfterLabel(seg, /^Map Reference\s+/i);
116
+ }
117
+ else if (/^Census Tract/i.test(t)) {
118
+ censusTract = extractAfterLabel(seg, /^Census Tract\s+/i);
119
+ }
120
+ }
121
+ }
122
+ const occLine = findLine(lines, /^Occupant/i);
123
+ let occupant = "", specialAssessments = null, hoaAmount = null, hoaPeriod = "";
124
+ if (occLine) {
125
+ for (const seg of occLine.segments) {
126
+ const t = seg.text.trim();
127
+ if (/^Special Assessments\s*\$/i.test(t))
128
+ specialAssessments = parseNum(extractAfterLabel(seg, /^Special Assessments\s*\$\s*/i));
129
+ else if (/^HOA\s*\$/i.test(t)) {
130
+ hoaAmount = parseNum(extractAfterLabel(seg, /^HOA\s*\$\s*/i));
131
+ bb.hoaAmount = toBBox(seg, occLine);
132
+ }
133
+ else if (/^per (year|month)/i.test(t))
134
+ hoaPeriod = t;
135
+ }
136
+ }
137
+ const prLine = findLine(lines, /^Property Rights Appraised/i);
138
+ let propertyRightsAppraised = "";
139
+ if (prLine) {
140
+ const idx = prLine.segments.findIndex((s) => /^Property Rights Appraised/i.test(s.text.trim()));
141
+ if (idx >= 0 && prLine.segments[idx + 1])
142
+ propertyRightsAppraised = prLine.segments[idx + 1].text.trim();
143
+ }
144
+ const assignLine = findLine(lines, /Assignment Type/i);
145
+ let assignmentType = "";
146
+ if (assignLine) {
147
+ const atIdx = assignLine.segments.findIndex((s) => /Assignment Type/i.test(s.text));
148
+ if (atIdx >= 0 && assignLine.segments[atIdx + 1])
149
+ assignmentType = assignLine.segments[atIdx + 1].text.trim();
150
+ }
151
+ const lenderLine = findLine(lines, /^Lender\/Client\s/i);
152
+ let lenderClient = "", lenderAddress = "";
153
+ if (lenderLine) {
154
+ for (const seg of lenderLine.segments) {
155
+ const t = seg.text.trim();
156
+ if (/^Lender\/Client\s/i.test(t)) {
157
+ lenderClient = extractAfterLabel(seg, /^Lender\/Client\s+/i);
158
+ bb.lenderClient = toBBox(seg, lenderLine);
159
+ }
160
+ else if (/^Address\s/i.test(t)) {
161
+ lenderAddress = extractAfterLabel(seg, /^Address\s+/i);
162
+ bb.lenderAddress = toBBox(seg, lenderLine);
163
+ }
164
+ }
165
+ }
166
+ return {
167
+ propertyAddress, unitNumber, city, state, zipCode, borrower, ownerOfPublicRecord, county,
168
+ legalDescription, assessorParcelNumber, taxYear, realEstateTaxes,
169
+ projectName, phase, mapReference, censusTract, occupant, specialAssessments,
170
+ hoaAmount, hoaPeriod, propertyRightsAppraised, assignmentType, lenderClient, lenderAddress,
171
+ boundingBoxes: bb,
172
+ };
173
+ }
174
+ // ── Contract ─────────────────────────────────────────────────────────────
175
+ export function parseContractSection(lines) {
176
+ const bb = {};
177
+ const offeredLine = findLine(lines, /currently offered for sale/i);
178
+ const isOfferedForSale = offeredLine?.fullText.includes("Yes") && !offeredLine?.fullText.includes("No") ? "Yes" : offeredLine?.fullText.includes("No") ? "No" : "";
179
+ const dataSourceLine = findLine(lines, /^Report data source/i);
180
+ let reportDataSources = "";
181
+ if (dataSourceLine) {
182
+ const valueSeg = dataSourceLine.segments.find((s) => s.x > 180);
183
+ if (valueSeg) {
184
+ reportDataSources = valueSeg.text.trim();
185
+ bb.reportDataSources = toBBox(valueSeg, dataSourceLine);
186
+ }
187
+ // May continue on next line
188
+ const idx = lines.indexOf(dataSourceLine);
189
+ if (idx >= 0) {
190
+ for (let i = idx + 1; i < lines.length; i++) {
191
+ if (/^I\s+(did|did not)/i.test(lines[i].fullText) || /^Contract Price/i.test(lines[i].fullText))
192
+ break;
193
+ reportDataSources += " " + lines[i].fullText.trim();
194
+ }
195
+ reportDataSources = reportDataSources.trim();
196
+ }
197
+ }
198
+ const analysisLine = findLine(lines, /did not analyze the contract|did.*analyze the contract/i);
199
+ let contractAnalysis = "";
200
+ if (analysisLine) {
201
+ const idx = lines.indexOf(analysisLine);
202
+ const parts = [analysisLine.fullText];
203
+ for (let i = idx + 1; i < lines.length; i++) {
204
+ if (/^Contract Price/i.test(lines[i].fullText))
205
+ break;
206
+ parts.push(lines[i].fullText.trim());
207
+ }
208
+ contractAnalysis = parts.join(" ").replace(/^I\s+(did|did not).*?performed\.\s*/is, "").trim();
209
+ }
210
+ const contractLine = findLine(lines, /^Contract Price\s*\$/i);
211
+ let contractPrice = null, dateOfContract = "";
212
+ if (contractLine) {
213
+ for (const seg of contractLine.segments) {
214
+ const t = seg.text.trim();
215
+ if (/^Contract Price\s*\$/i.test(t)) {
216
+ contractPrice = parseNum(extractAfterLabel(seg, /^Contract Price\s*\$\s*/i));
217
+ bb.contractPrice = toBBox(seg, contractLine);
218
+ }
219
+ else if (/^Date of Contract\s/i.test(t)) {
220
+ dateOfContract = extractAfterLabel(seg, /^Date of Contract\s+/i);
221
+ }
222
+ }
223
+ }
224
+ const assistLine = findLine(lines, /If Yes, report the total dollar/i);
225
+ let financialAssistance = "";
226
+ if (assistLine) {
227
+ const valueSeg = assistLine.segments.find((s) => /^\$/.test(s.text.trim()));
228
+ if (valueSeg)
229
+ financialAssistance = valueSeg.text.trim();
230
+ }
231
+ return { isOfferedForSale, reportDataSources, contractAnalysis, contractPrice, dateOfContract, financialAssistance, boundingBoxes: bb };
232
+ }
233
+ // ── Neighborhood ─────────────────────────────────────────────────────────
234
+ /**
235
+ * Detect which checkbox option is selected in a row by finding a filled rect
236
+ * positioned just before one of the option labels.
237
+ *
238
+ * Checkbox layout: each row has 3 options at fixed x positions.
239
+ * A filled ~7x7 rect appears ~12px before the selected option's text.
240
+ */
241
+ function detectCheckboxSelection(rects, rowY, options, yTolerance = 8) {
242
+ // Find rects in this row
243
+ const rowRects = rects.filter((r) => Math.abs(r.y - rowY) < yTolerance);
244
+ for (const rect of rowRects) {
245
+ // The checkbox rect appears ~12px before the option text x position
246
+ for (const opt of options) {
247
+ if (rect.x > opt.textX - 20 && rect.x < opt.textX - 2) {
248
+ return opt.label;
249
+ }
250
+ }
251
+ }
252
+ return "";
253
+ }
254
+ export function parseNeighborhoodSection(lines, checkboxRects) {
255
+ const bb = {};
256
+ let location = "", builtUp = "", growth = "";
257
+ let propertyValues = "", demandSupply = "", marketingTime = "";
258
+ // Detect checkbox selections from filled rects
259
+ if (checkboxRects && checkboxRects.length > 0) {
260
+ // Find the row y-coordinates from the text lines
261
+ const locationLine = findLine(lines, /^Location/i);
262
+ const builtUpLine = findLine(lines, /^Built-Up/i);
263
+ const growthLine2 = findLine(lines, /^Growth/i);
264
+ if (locationLine) {
265
+ location = detectCheckboxSelection(checkboxRects, locationLine.y, [
266
+ { label: "Urban", textX: 58 },
267
+ { label: "Suburban", textX: 105 },
268
+ { label: "Rural", textX: 155 },
269
+ ]);
270
+ propertyValues = detectCheckboxSelection(checkboxRects, locationLine.y, [
271
+ { label: "Increasing", textX: 259 },
272
+ { label: "Stable", textX: 317 },
273
+ { label: "Declining", textX: 367 },
274
+ ]);
275
+ }
276
+ if (builtUpLine) {
277
+ builtUp = detectCheckboxSelection(checkboxRects, builtUpLine.y, [
278
+ { label: "Over 75%", textX: 58 },
279
+ { label: "25-75%", textX: 105 },
280
+ { label: "Under 25%", textX: 155 },
281
+ ]);
282
+ demandSupply = detectCheckboxSelection(checkboxRects, builtUpLine.y, [
283
+ { label: "Shortage", textX: 259 },
284
+ { label: "In Balance", textX: 317 },
285
+ { label: "Over Supply", textX: 367 },
286
+ ]);
287
+ }
288
+ if (growthLine2) {
289
+ growth = detectCheckboxSelection(checkboxRects, growthLine2.y, [
290
+ { label: "Rapid", textX: 58 },
291
+ { label: "Stable", textX: 105 },
292
+ { label: "Slow", textX: 155 },
293
+ ]);
294
+ marketingTime = detectCheckboxSelection(checkboxRects, growthLine2.y, [
295
+ { label: "Under 3 mths", textX: 259 },
296
+ { label: "3-6 mths", textX: 317 },
297
+ { label: "Over 6 mths", textX: 367 },
298
+ ]);
299
+ }
300
+ }
301
+ let priceLow = null, priceHigh = null, pricePredominant = null;
302
+ let ageLow = null, ageHigh = null, agePredominant = null;
303
+ // Price/Age from the grid lines — values are in $(000) and (yrs) columns.
304
+ // The price and age values may be embedded within larger text segments
305
+ // (e.g. "Over 6 mths 415 Low", "1,250 High", "550 Pred. 36 Other").
306
+ // We look for segments in the x~360-470 range and parse numbers + labels.
307
+ const growthLine = findLine(lines, /^Growth/i);
308
+ const boundaryLine = findLine(lines, /^Neighborhood Boundaries/i);
309
+ // Extract price/age from a line by looking for patterns like "415 Low", "1,250 High", "550 Pred."
310
+ function extractPriceAge(line) {
311
+ if (!line)
312
+ return { price: null, age: null };
313
+ let price = null, age = null;
314
+ let priceSeg;
315
+ let ageSeg;
316
+ for (const seg of line.segments) {
317
+ if (seg.x < 350)
318
+ continue;
319
+ const t = seg.text.trim();
320
+ // Pattern: "number Low/High/Pred" for price (x ~360-420)
321
+ const priceMatch = t.match(/([\d,]+)\s*(Low|High|Pred\.?)/i);
322
+ if (priceMatch && seg.x < 470) {
323
+ price = parseNum(priceMatch[1]);
324
+ priceSeg = { seg, line };
325
+ }
326
+ // Pattern: standalone number for age (x >= 460) or "number label" where label is text
327
+ const ageMatch = t.match(/^(\d+)\s*$/);
328
+ if (ageMatch && seg.x >= 460) {
329
+ age = parseNum(ageMatch[1]);
330
+ ageSeg = { seg, line };
331
+ }
332
+ // Combined pattern: "550 Pred. 36 Other" — price then age
333
+ const combinedMatch = t.match(/([\d,]+)\s+Pred\.?\s+(\d+)/i);
334
+ if (combinedMatch) {
335
+ price = parseNum(combinedMatch[1]);
336
+ age = parseNum(combinedMatch[2]);
337
+ priceSeg = { seg, line };
338
+ ageSeg = { seg, line };
339
+ }
340
+ // Pattern: "45 Commercial" or "45 Other" where the number is the age
341
+ if (!age) {
342
+ const ageLabelMatch = t.match(/^(\d+)\s+(Commercial|Other|Multi|One)/i);
343
+ if (ageLabelMatch && seg.x >= 460) {
344
+ age = parseNum(ageLabelMatch[1]);
345
+ ageSeg = { seg, line };
346
+ }
347
+ }
348
+ }
349
+ return { price, age, priceSeg, ageSeg };
350
+ }
351
+ // Low line (Growth row)
352
+ const lowResult = extractPriceAge(growthLine);
353
+ priceLow = lowResult.price;
354
+ ageLow = lowResult.age;
355
+ if (lowResult.priceSeg)
356
+ bb.priceLow = toBBox(lowResult.priceSeg.seg, lowResult.priceSeg.line);
357
+ if (lowResult.ageSeg)
358
+ bb.ageLow = toBBox(lowResult.ageSeg.seg, lowResult.ageSeg.line);
359
+ // High line (Neighborhood Boundaries row)
360
+ const highResult = extractPriceAge(boundaryLine);
361
+ priceHigh = highResult.price;
362
+ ageHigh = highResult.age;
363
+ if (highResult.priceSeg)
364
+ bb.priceHigh = toBBox(highResult.priceSeg.seg, highResult.priceSeg.line);
365
+ if (highResult.ageSeg)
366
+ bb.ageHigh = toBBox(highResult.ageSeg.seg, highResult.ageSeg.line);
367
+ // Predominant line (the line after boundaries that has "Pred")
368
+ const predLine = lines.find((l) => l.y > (boundaryLine?.y ?? growthLine?.y ?? 0) && /Pred\.?/i.test(l.fullText));
369
+ const predResult = extractPriceAge(predLine);
370
+ pricePredominant = predResult.price;
371
+ agePredominant = predResult.age;
372
+ if (predResult.priceSeg)
373
+ bb.pricePredominant = toBBox(predResult.priceSeg.seg, predResult.priceSeg.line);
374
+ if (predResult.ageSeg)
375
+ bb.agePredominant = toBBox(predResult.ageSeg.seg, predResult.ageSeg.line);
376
+ // Land use percentages
377
+ let landUseOneUnit = null, landUseTwoFourUnit = null;
378
+ let landUseMultiFamily = null, landUseCommercial = null, landUseOther = null;
379
+ for (const l of lines) {
380
+ if (l.y < 310 || l.y > 370)
381
+ continue;
382
+ for (const seg of l.segments) {
383
+ if (seg.x < 540)
384
+ continue;
385
+ const pct = seg.text.match(/(\d+)\s*%/);
386
+ if (!pct)
387
+ continue;
388
+ const val = parseNum(pct[1]);
389
+ if (/One-Unit/i.test(l.fullText)) {
390
+ landUseOneUnit = val;
391
+ break;
392
+ }
393
+ if (/2-4 Unit/i.test(l.fullText)) {
394
+ landUseTwoFourUnit = val;
395
+ break;
396
+ }
397
+ if (/Multi/i.test(l.fullText)) {
398
+ landUseMultiFamily = val;
399
+ break;
400
+ }
401
+ if (/Commercial/i.test(l.fullText)) {
402
+ landUseCommercial = val;
403
+ break;
404
+ }
405
+ if (/Other/i.test(l.fullText) && !/Other \(describe\)/i.test(l.fullText)) {
406
+ landUseOther = val;
407
+ break;
408
+ }
409
+ }
410
+ }
411
+ // Boundaries
412
+ let boundaries = "";
413
+ if (boundaryLine) {
414
+ const valueSeg = boundaryLine.segments.find((s) => s.x > 100 && s.x < 410);
415
+ if (valueSeg) {
416
+ boundaries = valueSeg.text.trim();
417
+ bb.boundaries = toBBox(valueSeg, boundaryLine);
418
+ }
419
+ const idx = lines.indexOf(boundaryLine);
420
+ if (idx >= 0 && lines[idx + 1] && lines[idx + 1].y < boundaryLine.y + 20) {
421
+ const nextSeg = lines[idx + 1].segments.find((s) => s.x < 410);
422
+ if (nextSeg)
423
+ boundaries += " " + nextSeg.text.trim();
424
+ }
425
+ }
426
+ // Description
427
+ const descLine = findLine(lines, /^Neighborhood Description/i);
428
+ let description = "";
429
+ if (descLine) {
430
+ const valueSeg = descLine.segments.find((s) => s.x > 100);
431
+ if (valueSeg) {
432
+ description = valueSeg.text.trim();
433
+ bb.description = toBBox(valueSeg, descLine);
434
+ }
435
+ }
436
+ // Market conditions
437
+ const mcLine = findLine(lines, /^Market Conditions\s*\(/i);
438
+ let marketConditions = "";
439
+ if (mcLine) {
440
+ const idx = lines.indexOf(mcLine);
441
+ const valueSeg = mcLine.segments.find((s) => s.x > 200);
442
+ const parts = [];
443
+ if (valueSeg) {
444
+ parts.push(valueSeg.text.trim());
445
+ bb.marketConditions = toBBox(valueSeg, mcLine);
446
+ }
447
+ for (let i = idx + 1; i < lines.length; i++) {
448
+ if (/^Topography|^Specific Zoning/i.test(lines[i].fullText))
449
+ break;
450
+ parts.push(lines[i].fullText.trim());
451
+ }
452
+ marketConditions = parts.join(" ").trim();
453
+ }
454
+ return {
455
+ location, builtUp, growth, propertyValues, demandSupply, marketingTime,
456
+ priceLow, priceHigh, pricePredominant, ageLow, ageHigh, agePredominant,
457
+ landUseOneUnit, landUseTwoFourUnit, landUseMultiFamily, landUseCommercial, landUseOther,
458
+ boundaries, description, marketConditions, boundingBoxes: bb,
459
+ };
460
+ }
461
+ // ── Project Site ─────────────────────────────────────────────────────────
462
+ export function parseProjectSiteSection(lines) {
463
+ const bb = {};
464
+ const topoLine = findLine(lines, /^Topography/i);
465
+ let topography = "", size = "", density = "", view = "";
466
+ if (topoLine) {
467
+ for (const seg of topoLine.segments) {
468
+ const t = seg.text.trim();
469
+ if (/^Topography\s/i.test(t)) {
470
+ topography = extractAfterLabel(seg, /^Topography\s+/i);
471
+ bb.topography = toBBox(seg, topoLine);
472
+ }
473
+ else if (/^Size\s/i.test(t)) {
474
+ size = extractAfterLabel(seg, /^Size\s+/i);
475
+ }
476
+ else if (/^Density\s/i.test(t)) {
477
+ density = extractAfterLabel(seg, /^Density\s+/i);
478
+ }
479
+ else if (/^View\s/i.test(t)) {
480
+ view = extractAfterLabel(seg, /^View\s+/i);
481
+ bb.view = toBBox(seg, topoLine);
482
+ }
483
+ }
484
+ }
485
+ const zonLine = findLine(lines, /^Specific Zoning Classification/i);
486
+ let zoningClassification = "", zoningDescription = "";
487
+ if (zonLine) {
488
+ for (const seg of zonLine.segments) {
489
+ const t = seg.text.trim();
490
+ if (/^Specific Zoning Classification/i.test(t))
491
+ zoningClassification = extractAfterLabel(seg, /^Specific Zoning Classification\s+/i);
492
+ else if (/^Zoning Description/i.test(t))
493
+ zoningDescription = extractAfterLabel(seg, /^Zoning Description\s+/i);
494
+ }
495
+ }
496
+ const compLine = findLine(lines, /^Zoning Compliance/i);
497
+ let zoningCompliance = "";
498
+ if (compLine) {
499
+ const idx = compLine.segments.findIndex((s) => /^Zoning Compliance/i.test(s.text.trim()));
500
+ if (idx >= 0 && compLine.segments[idx + 1])
501
+ zoningCompliance = compLine.segments[idx + 1].text.trim();
502
+ }
503
+ const hbuLine = findLine(lines, /highest and best use/i);
504
+ let highestAndBestUse = "";
505
+ if (hbuLine) {
506
+ if (/\bYes\b/.test(hbuLine.fullText))
507
+ highestAndBestUse = "Yes";
508
+ else if (/\bNo\b/.test(hbuLine.fullText))
509
+ highestAndBestUse = "No";
510
+ }
511
+ const femaLine = findLine(lines, /FEMA Special Flood/i);
512
+ let femaFloodZone = "", femaMapNumber = "", femaMapDate = "";
513
+ if (femaLine) {
514
+ for (const seg of femaLine.segments) {
515
+ const t = seg.text.trim();
516
+ if (/FEMA Flood Zone\s/i.test(t))
517
+ femaFloodZone = extractAfterLabel(seg, /.*?FEMA Flood Zone\s+/i);
518
+ else if (/FEMA Map #\s/i.test(t)) {
519
+ femaMapNumber = extractAfterLabel(seg, /^FEMA Map #\s+/i);
520
+ bb.femaMapNumber = toBBox(seg, femaLine);
521
+ }
522
+ else if (/FEMA Map Date\s/i.test(t)) {
523
+ femaMapDate = extractAfterLabel(seg, /^FEMA Map Date\s+/i);
524
+ }
525
+ }
526
+ }
527
+ const adverseLine = findLine(lines, /adverse site conditions/i);
528
+ let adverseConditions = "";
529
+ if (adverseLine) {
530
+ const idx = lines.indexOf(adverseLine);
531
+ const parts = [];
532
+ for (let i = idx + 1; i < lines.length; i++) {
533
+ if (/^Data source\(s\) for project/i.test(lines[i].fullText))
534
+ break;
535
+ const txt = lines[i].fullText.trim();
536
+ if (txt)
537
+ parts.push(txt);
538
+ }
539
+ adverseConditions = parts.join(" ").trim();
540
+ }
541
+ return { topography, size, density, view, zoningClassification, zoningDescription, zoningCompliance, highestAndBestUse, femaFloodZone, femaMapNumber, femaMapDate, adverseConditions, boundingBoxes: bb };
542
+ }
543
+ // ── Project Information ──────────────────────────────────────────────────
544
+ export function parseProjectInfoSection(lines) {
545
+ const bb = {};
546
+ const dsLine = findLine(lines, /^Data source\(s\) for project information/i);
547
+ let dataSourcesForProjectInfo = "";
548
+ if (dsLine) {
549
+ const valueSeg = dsLine.segments.find((s) => s.x > 130);
550
+ if (valueSeg)
551
+ dataSourcesForProjectInfo = valueSeg.text.trim();
552
+ }
553
+ const projDescLine = findLine(lines, /^Project Description/i);
554
+ let projectDescription = "";
555
+ if (projDescLine) {
556
+ // Last segment is usually the selected description type
557
+ const lastSeg = projDescLine.segments[projDescLine.segments.length - 1];
558
+ if (lastSeg && !/^Project Description/i.test(lastSeg.text)) {
559
+ const m = lastSeg.text.match(/Other \(describe\)\s*(.*)/i);
560
+ projectDescription = m ? m[1].trim() : lastSeg.text.replace(/^Other \(describe\)\s*/i, "").trim();
561
+ bb.projectDescription = toBBox(lastSeg, projDescLine);
562
+ }
563
+ }
564
+ const storiesLine = findLine(lines, /^# of Stories/i);
565
+ let numberOfStories = null, exteriorWalls = "";
566
+ if (storiesLine) {
567
+ for (const seg of storiesLine.segments) {
568
+ const t = seg.text.trim();
569
+ if (/^# of Stories\s/i.test(t))
570
+ numberOfStories = parseNum(extractAfterLabel(seg, /^# of Stories\s+/i));
571
+ else if (/^Exterior Walls$/i.test(t)) {
572
+ const next = storiesLine.segments[storiesLine.segments.indexOf(seg) + 1];
573
+ if (next)
574
+ exteriorWalls = next.text.trim();
575
+ }
576
+ }
577
+ }
578
+ const elevLine = findLine(lines, /^# of Elevators/i);
579
+ let numberOfElevators = null, roofSurface = "";
580
+ if (elevLine) {
581
+ for (const seg of elevLine.segments) {
582
+ const t = seg.text.trim();
583
+ if (/^# of Elevators\s/i.test(t))
584
+ numberOfElevators = parseNum(extractAfterLabel(seg, /^# of Elevators\s+/i));
585
+ else if (/^Roof Surface$/i.test(t)) {
586
+ const next = elevLine.segments[elevLine.segments.indexOf(seg) + 1];
587
+ if (next)
588
+ roofSurface = next.text.trim();
589
+ }
590
+ }
591
+ }
592
+ const existLine = findLine(lines, /^Existing|^Under Construction/i, { minY: 660 });
593
+ let existingOrProposed = "";
594
+ if (existLine) {
595
+ if (/Existing/.test(existLine.fullText))
596
+ existingOrProposed = "Existing";
597
+ else if (/Proposed/.test(existLine.fullText))
598
+ existingOrProposed = "Proposed";
599
+ else if (/Under Construction/.test(existLine.fullText))
600
+ existingOrProposed = "Under Construction";
601
+ }
602
+ const parkLine = findLine(lines, /^Total # Parking/i) ?? findLine(lines, /Total # Parking/i);
603
+ let totalParking = null, parkingRatio = "";
604
+ if (parkLine) {
605
+ for (const seg of parkLine.segments) {
606
+ if (/^Total # Parking/i.test(seg.text)) {
607
+ const next = parkLine.segments[parkLine.segments.indexOf(seg) + 1];
608
+ if (next)
609
+ totalParking = parseNum(next.text);
610
+ }
611
+ }
612
+ }
613
+ const ratioLine = findLine(lines, /Ratio \(spaces\/units\)/i);
614
+ if (ratioLine) {
615
+ for (const seg of ratioLine.segments) {
616
+ if (/Ratio \(spaces\/units\)/i.test(seg.text))
617
+ parkingRatio = extractAfterLabel(seg, /^Ratio \(spaces\/units\)\s*/i);
618
+ }
619
+ }
620
+ const yrLine = findLine(lines, /^Year Built/i, { minY: 680 });
621
+ let yearBuilt = null, parkingType = "";
622
+ if (yrLine) {
623
+ for (const seg of yrLine.segments) {
624
+ if (/^Year Built/i.test(seg.text)) {
625
+ const next = yrLine.segments[yrLine.segments.indexOf(seg) + 1];
626
+ if (next) {
627
+ yearBuilt = parseNum(next.text);
628
+ bb.yearBuilt = toBBox(next, yrLine);
629
+ }
630
+ }
631
+ else if (/^Type/i.test(seg.text)) {
632
+ const next = yrLine.segments[yrLine.segments.indexOf(seg) + 1];
633
+ if (next)
634
+ parkingType = next.text.trim();
635
+ }
636
+ }
637
+ }
638
+ const effLine = findLine(lines, /^Effective Age/i, { minY: 690 });
639
+ let effectiveAge = null, guestParking = null;
640
+ if (effLine) {
641
+ for (const seg of effLine.segments) {
642
+ if (/^Effective Age\s/i.test(seg.text))
643
+ effectiveAge = parseNum(extractAfterLabel(seg, /^Effective Age\s+/i));
644
+ else if (/^Guest Parking\s/i.test(seg.text))
645
+ guestParking = parseNum(extractAfterLabel(seg, /^Guest Parking\s+/i));
646
+ }
647
+ }
648
+ // Unit counts from the grid
649
+ function getUnitCount(label) {
650
+ for (const l of lines) {
651
+ if (!label.test(l.fullText))
652
+ continue;
653
+ for (const seg of l.segments) {
654
+ if (label.test(seg.text))
655
+ continue;
656
+ if (seg.x >= 200 && seg.x < 320) {
657
+ const m = seg.text.match(/^(\d+)/);
658
+ if (m)
659
+ return parseNum(m[1]);
660
+ }
661
+ }
662
+ }
663
+ return null;
664
+ }
665
+ const numberOfUnits = getUnitCount(/# of Units\b(?! (Completed|For Sale|Sold|Rented))/i);
666
+ const numberOfPhases = getUnitCount(/# of Phases\b/i);
667
+ const unitsForSale = getUnitCount(/# of Units For Sale/i);
668
+ const unitsSold = getUnitCount(/# of Units Sold/i);
669
+ const unitsRented = getUnitCount(/# of Units Rented/i);
670
+ const ownerOccupiedUnits = getUnitCount(/# of Owner Occupied/i);
671
+ const occLine = findLine(lines, /^Project Primary Occupancy/i);
672
+ let projectPrimaryOccupancy = "";
673
+ if (occLine) {
674
+ const idx = occLine.segments.findIndex((s) => /^Project Primary Occupancy/i.test(s.text));
675
+ if (idx >= 0 && occLine.segments[idx + 1])
676
+ projectPrimaryOccupancy = occLine.segments[idx + 1].text.trim();
677
+ }
678
+ const hoaLine = findLine(lines, /developer\/builder in control.*HOA/i);
679
+ const hoaControl = hoaLine?.fullText.includes("Yes") ? "Yes" : hoaLine?.fullText.includes("No") ? "No" : "";
680
+ const mgmtLine = findLine(lines, /^Management Group/i);
681
+ let managementGroup = "";
682
+ if (mgmtLine) {
683
+ const idx = mgmtLine.segments.findIndex((s) => /^Management Group/i.test(s.text));
684
+ if (idx >= 0 && mgmtLine.segments[idx + 1])
685
+ managementGroup = mgmtLine.segments[idx + 1].text.trim();
686
+ }
687
+ const entityLine = findLine(lines, /single entity.*own more than 10%/i);
688
+ const singleEntityOwnership = entityLine?.fullText.includes("Yes") ? "Yes" : entityLine?.fullText.includes("No") ? "No" : "";
689
+ const convLine = findLine(lines, /conversion of existing building/i);
690
+ const conversionFromExisting = convLine?.fullText.includes("Yes") ? "Yes" : convLine?.fullText.includes("No") ? "No" : "";
691
+ const completeLine = findLine(lines, /units, common elements.*complete/i);
692
+ const unitsComplete = completeLine?.fullText.includes("Yes") ? "Yes" : completeLine?.fullText.includes("No") ? "No" : "";
693
+ const commLine = findLine(lines, /commercial space in the project/i);
694
+ const commercialSpace = commLine?.fullText.includes("Yes") ? "Yes" : commLine?.fullText.includes("No") ? "No" : "";
695
+ return {
696
+ dataSourcesForProjectInfo, projectDescription, numberOfStories, exteriorWalls,
697
+ numberOfElevators, roofSurface, existingOrProposed, totalParking, parkingRatio,
698
+ yearBuilt, parkingType, effectiveAge, guestParking,
699
+ numberOfUnits, numberOfPhases, unitsForSale, unitsSold, unitsRented, ownerOccupiedUnits,
700
+ projectPrimaryOccupancy, hoaControl, managementGroup, singleEntityOwnership,
701
+ conversionFromExisting, unitsComplete, commercialSpace,
702
+ boundingBoxes: bb,
703
+ };
704
+ }