@parseo/appraisals 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +35 -0
  2. package/dist/form-1004mc/extract-checkboxes.d.ts +43 -0
  3. package/dist/form-1004mc/extract-checkboxes.d.ts.map +1 -0
  4. package/dist/form-1004mc/extract-checkboxes.js +145 -0
  5. package/dist/form-1004mc/index.d.ts +3 -0
  6. package/dist/form-1004mc/index.d.ts.map +1 -0
  7. package/dist/form-1004mc/index.js +1 -0
  8. package/dist/form-1004mc/parse-page1.d.ts +8 -0
  9. package/dist/form-1004mc/parse-page1.d.ts.map +1 -0
  10. package/dist/form-1004mc/parse-page1.js +760 -0
  11. package/dist/form-1004mc/parse-sales.d.ts +6 -0
  12. package/dist/form-1004mc/parse-sales.d.ts.map +1 -0
  13. package/dist/form-1004mc/parse-sales.js +505 -0
  14. package/dist/form-1004mc/parser.d.ts +5 -0
  15. package/dist/form-1004mc/parser.d.ts.map +1 -0
  16. package/dist/form-1004mc/parser.js +437 -0
  17. package/dist/form-1004mc/types.d.ts +302 -0
  18. package/dist/form-1004mc/types.d.ts.map +1 -0
  19. package/dist/form-1004mc/types.js +1 -0
  20. package/dist/form-1073/index.d.ts +3 -0
  21. package/dist/form-1073/index.d.ts.map +1 -0
  22. package/dist/form-1073/index.js +1 -0
  23. package/dist/form-1073/parse-page1.d.ts +8 -0
  24. package/dist/form-1073/parse-page1.d.ts.map +1 -0
  25. package/dist/form-1073/parse-page1.js +704 -0
  26. package/dist/form-1073/parse-page2.d.ts +6 -0
  27. package/dist/form-1073/parse-page2.d.ts.map +1 -0
  28. package/dist/form-1073/parse-page2.js +438 -0
  29. package/dist/form-1073/parse-sales.d.ts +7 -0
  30. package/dist/form-1073/parse-sales.d.ts.map +1 -0
  31. package/dist/form-1073/parse-sales.js +477 -0
  32. package/dist/form-1073/parser.d.ts +5 -0
  33. package/dist/form-1073/parser.d.ts.map +1 -0
  34. package/dist/form-1073/parser.js +102 -0
  35. package/dist/form-1073/types.d.ts +300 -0
  36. package/dist/form-1073/types.d.ts.map +1 -0
  37. package/dist/form-1073/types.js +1 -0
  38. package/dist/index.d.ts +13 -0
  39. package/dist/index.d.ts.map +1 -0
  40. package/dist/index.js +9 -0
  41. package/dist/richer-values/index.d.ts +3 -0
  42. package/dist/richer-values/index.d.ts.map +1 -0
  43. package/dist/richer-values/index.js +1 -0
  44. package/dist/richer-values/parser.d.ts +5 -0
  45. package/dist/richer-values/parser.d.ts.map +1 -0
  46. package/dist/richer-values/parser.js +1067 -0
  47. package/dist/richer-values/types.d.ts +225 -0
  48. package/dist/richer-values/types.d.ts.map +1 -0
  49. package/dist/richer-values/types.js +1 -0
  50. package/package.json +24 -0
@@ -0,0 +1,477 @@
1
+ import { toBBox } from "@parseo/shared";
2
+ // ── Utilities ────────────────────────────────────────────────────────────
3
+ function parseNum(raw) {
4
+ if (!raw)
5
+ return null;
6
+ const cleaned = raw.replace(/[$,%]/g, "").replace(/,/g, "").trim();
7
+ if (!cleaned || /^n\/?a$/i.test(cleaned))
8
+ return null;
9
+ const n = Number(cleaned);
10
+ return Number.isNaN(n) ? null : n;
11
+ }
12
+ function parseCurrency(raw) {
13
+ const match = raw.match(/\$?\s*([\d,]+(?:\.\d+)?)/);
14
+ return match ? parseNum(match[1]) : null;
15
+ }
16
+ // Column x-boundaries for Form 1073 sales comparison grid
17
+ // Derived from segment positions in the sample PDF
18
+ const SUBJECT_X = { min: 114, max: 180 };
19
+ const COMP_COLS = [
20
+ { desc: { min: 180, max: 260 }, adj: { min: 260, max: 313 } },
21
+ { desc: { min: 313, max: 392 }, adj: { min: 392, max: 444 } },
22
+ { desc: { min: 444, max: 524 }, adj: { min: 524, max: 580 } },
23
+ ];
24
+ function segInRange(line, min, max) {
25
+ const segs = line.segments.filter((s) => s.x >= min && s.x < max);
26
+ return segs.map((s) => s.text.trim()).join(" ").trim();
27
+ }
28
+ function segWithBBox(line, min, max) {
29
+ const seg = line.segments.find((s) => s.x >= min && s.x < max);
30
+ return { text: seg ? seg.text.trim() : "", seg: seg ?? null };
31
+ }
32
+ function findLine(lines, pattern) {
33
+ return lines.find((l) => pattern.test(l.fullText));
34
+ }
35
+ // ── Parse single comparable from a column ────────────────────────────────
36
+ function parseComparable(lines, compNum, col) {
37
+ const bb = {};
38
+ const prefix = `comp${compNum}`;
39
+ function getDesc(line) {
40
+ return line ? segInRange(line, col.desc.min, col.adj.min) : "";
41
+ }
42
+ function getAdj(line) {
43
+ if (!line)
44
+ return null;
45
+ const text = segInRange(line, col.adj.min, col.adj.min + 60);
46
+ return parseNum(text);
47
+ }
48
+ function getDescWithBB(line, key) {
49
+ if (!line)
50
+ return "";
51
+ const r = segWithBBox(line, col.desc.min, col.adj.min);
52
+ if (r.seg)
53
+ bb[`${prefix}_${key}`] = toBBox(r.seg, line);
54
+ return r.text;
55
+ }
56
+ const addrLine = findLine(lines, /^Address/i);
57
+ const address = getDescWithBB(addrLine, "address");
58
+ const addrIdx = addrLine ? lines.indexOf(addrLine) : -1;
59
+ const cityLine = addrIdx >= 0 ? lines[addrIdx + 1] : undefined;
60
+ const cityVal = cityLine ? segInRange(cityLine, col.desc.min, col.adj.min) : "";
61
+ const projLine = findLine(lines, /^Project Name/i);
62
+ const projectNamePhase = getDesc(projLine);
63
+ const proxLine = findLine(lines, /^Proximity to Subject/i);
64
+ const proximityToSubject = getDescWithBB(proxLine, "proximity");
65
+ const priceLine = findLine(lines, /^Sale Price\b/i);
66
+ let salePrice = null;
67
+ if (priceLine) {
68
+ const priceText = segInRange(priceLine, col.desc.min - 30, col.adj.min);
69
+ salePrice = parseCurrency(priceText);
70
+ const pSeg = segWithBBox(priceLine, col.desc.min, col.adj.min);
71
+ if (pSeg.seg)
72
+ bb[`${prefix}_salePrice`] = toBBox(pSeg.seg, priceLine);
73
+ }
74
+ const ppsLine = findLine(lines, /^Sale Price\/Gross Liv/i);
75
+ let salePricePerSqft = null;
76
+ if (ppsLine) {
77
+ const txt = segInRange(ppsLine, col.desc.min - 30, col.adj.min);
78
+ const m = txt.match(/([\d,.]+)\s*sq\.?\s*ft/i);
79
+ if (m)
80
+ salePricePerSqft = parseNum(m[1]);
81
+ }
82
+ const dsLine = findLine(lines, /^Data Source\(s\)/i);
83
+ const dataSources = getDescWithBB(dsLine, "dataSources");
84
+ const vsLine = findLine(lines, /^Verification Source/i);
85
+ const verificationSources = getDesc(vsLine);
86
+ const sfLine = findLine(lines, /^Sales or Financing/i);
87
+ const salesOrFinancing = getDesc(sfLine);
88
+ const concLine = findLine(lines, /^Concessions/i);
89
+ const concessions = getDesc(concLine);
90
+ const dateLine = findLine(lines, /^Date of Sale\/Time/i);
91
+ const dateOfSaleTime = getDescWithBB(dateLine, "dateOfSale");
92
+ const locLine = findLine(lines, /^Location\b/i);
93
+ const location = getDesc(locLine);
94
+ const feeLine = findLine(lines, /^Leasehold\/Fee Simple/i);
95
+ const leaseholdFeeSimple = getDesc(feeLine);
96
+ const hoaLine = findLine(lines, /^HOA Mo\. Assessment/i);
97
+ const hoaMoAssessment = hoaLine ? parseNum(getDesc(hoaLine)) : null;
98
+ const commLine = findLine(lines, /^Common Elements/i);
99
+ const commonElements = getDesc(commLine);
100
+ const floorLine = findLine(lines, /^Floor Location/i);
101
+ const floorLocation = getDesc(floorLine);
102
+ const viewLine = findLine(lines, /^View\b/i);
103
+ const view = getDesc(viewLine);
104
+ const styleLine = findLine(lines, /^Design \(Style\)/i);
105
+ const designStyle = getDesc(styleLine);
106
+ const qualLine = findLine(lines, /^Quality of Construction/i);
107
+ const qualityOfConstruction = getDesc(qualLine);
108
+ const ageLine = findLine(lines, /^Actual Age\b/i);
109
+ const actualAge = parseNum(getDesc(ageLine));
110
+ const condLine = findLine(lines, /^Condition\b/i);
111
+ const condition = getDescWithBB(condLine, "condition");
112
+ const conditionAdjustment = getAdj(condLine);
113
+ const rcLine = findLine(lines, /^Room Count/i);
114
+ let roomCountTotal = null, roomCountBedrooms = null, roomCountBaths = null, roomCountAdjustment = null;
115
+ if (rcLine) {
116
+ const text = segInRange(rcLine, col.desc.min, col.adj.min);
117
+ const parts = text.split(/\s+/).map((s) => parseNum(s)).filter((n) => n !== null);
118
+ if (parts.length >= 3) {
119
+ roomCountTotal = parts[0];
120
+ roomCountBedrooms = parts[1];
121
+ roomCountBaths = parts[2];
122
+ }
123
+ roomCountAdjustment = getAdj(rcLine);
124
+ }
125
+ const glaLine = findLine(lines, /^Gross Living Area/i);
126
+ let grossLivingArea = null, grossLivingAreaAdjustment = null;
127
+ if (glaLine) {
128
+ const text = segInRange(glaLine, col.desc.min - 30, col.adj.min);
129
+ const m = text.match(/([\d,]+)\s*sq/i);
130
+ if (m)
131
+ grossLivingArea = parseNum(m[1]);
132
+ grossLivingAreaAdjustment = getAdj(glaLine);
133
+ const glaSeg = segWithBBox(glaLine, col.desc.min, col.adj.min);
134
+ if (glaSeg.seg)
135
+ bb[`${prefix}_gla`] = toBBox(glaSeg.seg, glaLine);
136
+ }
137
+ const bfLine = findLine(lines, /^Basement & Finished/i);
138
+ const basementFinished = getDesc(bfLine);
139
+ const fuLine = findLine(lines, /^Functional Utility/i);
140
+ const functionalUtility = getDesc(fuLine);
141
+ const hcLine = findLine(lines, /^Heating\/Cooling/i);
142
+ const heatingCooling = getDesc(hcLine);
143
+ const eeLine = findLine(lines, /^Energy Efficient/i);
144
+ const energyEfficientItems = getDesc(eeLine);
145
+ const gcLine = findLine(lines, /^Garage\/Carport/i);
146
+ const garageCarport = getDesc(gcLine);
147
+ const ppLine = findLine(lines, /^Porch\/Patio\/Deck/i);
148
+ const porchPatioDeck = getDesc(ppLine);
149
+ // Net/Gross adjustment
150
+ const netLine = findLine(lines, /^Net Adjustment \(Total\)/i);
151
+ let netAdjustmentTotal = null;
152
+ if (netLine) {
153
+ const text = segInRange(netLine, col.adj.min - 10, col.adj.min + 60);
154
+ netAdjustmentTotal = parseNum(text);
155
+ }
156
+ const adjPriceLine = findLine(lines, /^Adjusted Sale Price/i);
157
+ let netAdjustmentPercent = null;
158
+ if (adjPriceLine) {
159
+ const text = segInRange(adjPriceLine, col.desc.min, col.adj.min + 20);
160
+ const pctMatch = text.match(/([\d.]+)\s*%/);
161
+ if (pctMatch)
162
+ netAdjustmentPercent = parseNum(pctMatch[1]);
163
+ }
164
+ const grossLine = findLine(lines, /^of Comparables/i);
165
+ let grossAdjustmentPercent = null, adjustedSalePrice = null;
166
+ if (grossLine) {
167
+ const text = segInRange(grossLine, col.desc.min, col.adj.min + 60);
168
+ const pctMatch = text.match(/([\d.]+)\s*%/);
169
+ if (pctMatch)
170
+ grossAdjustmentPercent = parseNum(pctMatch[1]);
171
+ const priceMatch = text.match(/\$\s*([\d,]+)/);
172
+ if (priceMatch)
173
+ adjustedSalePrice = parseNum(priceMatch[1]);
174
+ }
175
+ return {
176
+ number: compNum,
177
+ address: address + (cityVal ? `, ${cityVal}` : ""),
178
+ projectNamePhase, proximityToSubject, salePrice, salePricePerSqft,
179
+ dataSources, verificationSources, salesOrFinancing, concessions, dateOfSaleTime,
180
+ location, leaseholdFeeSimple, hoaMoAssessment, commonElements, floorLocation,
181
+ view, designStyle, qualityOfConstruction, actualAge,
182
+ condition, conditionAdjustment,
183
+ roomCountTotal, roomCountBedrooms, roomCountBaths, roomCountAdjustment,
184
+ grossLivingArea, grossLivingAreaAdjustment, basementFinished,
185
+ functionalUtility, heatingCooling, energyEfficientItems, garageCarport, porchPatioDeck,
186
+ netAdjustmentTotal, netAdjustmentPercent, grossAdjustmentPercent, adjustedSalePrice,
187
+ boundingBoxes: bb,
188
+ };
189
+ }
190
+ // ── Sales Comparison Section ─────────────────────────────────────────────
191
+ export function parseSalesComparisonSection(lines, compStartNum = 1) {
192
+ const bb = {};
193
+ // Active listings / comparable sales ranges
194
+ const alLine = findLine(lines, /comparable properties currently offered/i);
195
+ let activeListingsCount = null, activeListingsLow = null, activeListingsHigh = null;
196
+ if (alLine) {
197
+ const countMatch = alLine.fullText.match(/are\s+(\d+)\s+comparable properties/i);
198
+ if (countMatch)
199
+ activeListingsCount = parseNum(countMatch[1]);
200
+ const rangeMatch = alLine.fullText.match(/\$\s*([\d,]+).*?to\s*\$\s*([\d,]+)/i);
201
+ if (rangeMatch) {
202
+ activeListingsLow = parseNum(rangeMatch[1]);
203
+ activeListingsHigh = parseNum(rangeMatch[2]);
204
+ }
205
+ }
206
+ const csLine = findLine(lines, /comparable sales in the subject/i);
207
+ let comparableSalesCount = null, comparableSalesLow = null, comparableSalesHigh = null;
208
+ if (csLine) {
209
+ const countMatch = csLine.fullText.match(/are\s+(\d+)\s+comparable sales/i);
210
+ if (countMatch)
211
+ comparableSalesCount = parseNum(countMatch[1]);
212
+ const rangeMatch = csLine.fullText.match(/\$\s*([\d,]+).*?to\s*\$\s*([\d,]+)/i);
213
+ if (rangeMatch) {
214
+ comparableSalesLow = parseNum(rangeMatch[1]);
215
+ comparableSalesHigh = parseNum(rangeMatch[2]);
216
+ }
217
+ }
218
+ // Subject column
219
+ const subBB = {};
220
+ function getSubject(line) {
221
+ return line ? segInRange(line, SUBJECT_X.min, SUBJECT_X.max) : "";
222
+ }
223
+ function getSubjectWithBB(line, key) {
224
+ if (!line)
225
+ return "";
226
+ const r = segWithBBox(line, SUBJECT_X.min, SUBJECT_X.max);
227
+ if (r.seg)
228
+ subBB[key] = toBBox(r.seg, line);
229
+ return r.text;
230
+ }
231
+ const addrLine = findLine(lines, /^Address/i);
232
+ const subjAddress = getSubjectWithBB(addrLine, "address");
233
+ const projLine = findLine(lines, /^Project Name/i);
234
+ const subjProject = getSubject(projLine);
235
+ const priceLine = findLine(lines, /^Sale Price\b/i);
236
+ const subjPrice = priceLine ? parseCurrency(segInRange(priceLine, SUBJECT_X.min - 20, SUBJECT_X.max)) : null;
237
+ const ppsLine = findLine(lines, /^Sale Price\/Gross Liv/i);
238
+ let subjPPS = null;
239
+ if (ppsLine) {
240
+ const m = segInRange(ppsLine, SUBJECT_X.min, SUBJECT_X.max).match(/([\d,.]+)/);
241
+ if (m)
242
+ subjPPS = parseNum(m[1]);
243
+ }
244
+ const hoaLine = findLine(lines, /^HOA Mo\. Assessment/i);
245
+ const subjHOA = hoaLine ? parseNum(getSubject(hoaLine)) : null;
246
+ const rcLine = findLine(lines, /^Room Count/i);
247
+ let subjRCTotal = null, subjRCBed = null, subjRCBath = null;
248
+ if (rcLine) {
249
+ const text = segInRange(rcLine, SUBJECT_X.min, SUBJECT_X.max);
250
+ const parts = text.split(/\s+/).map((s) => parseNum(s)).filter((n) => n !== null);
251
+ if (parts.length >= 3) {
252
+ subjRCTotal = parts[0];
253
+ subjRCBed = parts[1];
254
+ subjRCBath = parts[2];
255
+ }
256
+ }
257
+ const glaLine = findLine(lines, /^Gross Living Area/i);
258
+ let subjGLA = null;
259
+ if (glaLine) {
260
+ const m = segInRange(glaLine, SUBJECT_X.min, SUBJECT_X.max).match(/([\d,]+)/);
261
+ if (m)
262
+ subjGLA = parseNum(m[1]);
263
+ }
264
+ const subject = {
265
+ address: subjAddress,
266
+ projectNamePhase: subjProject,
267
+ salePrice: subjPrice,
268
+ salePricePerSqft: subjPPS,
269
+ location: getSubject(findLine(lines, /^Location\b/i)),
270
+ leaseholdFeeSimple: getSubject(findLine(lines, /^Leasehold\/Fee/i)),
271
+ hoaMoAssessment: subjHOA,
272
+ commonElements: getSubject(findLine(lines, /^Common Elements/i)),
273
+ floorLocation: getSubject(findLine(lines, /^Floor Location/i)),
274
+ view: getSubject(findLine(lines, /^View\b/i)),
275
+ designStyle: getSubject(findLine(lines, /^Design \(Style\)/i)),
276
+ qualityOfConstruction: getSubject(findLine(lines, /^Quality of Construction/i)),
277
+ actualAge: parseNum(getSubject(findLine(lines, /^Actual Age/i))),
278
+ condition: getSubjectWithBB(findLine(lines, /^Condition\b/i), "condition"),
279
+ roomCountTotal: subjRCTotal,
280
+ roomCountBedrooms: subjRCBed,
281
+ roomCountBaths: subjRCBath,
282
+ grossLivingArea: subjGLA,
283
+ basementFinished: getSubject(findLine(lines, /^Basement & Finished/i)),
284
+ functionalUtility: getSubject(findLine(lines, /^Functional Utility/i)),
285
+ heatingCooling: getSubject(findLine(lines, /^Heating\/Cooling/i)),
286
+ energyEfficientItems: getSubject(findLine(lines, /^Energy Efficient/i)),
287
+ garageCarport: getSubject(findLine(lines, /^Garage\/Carport/i)),
288
+ porchPatioDeck: getSubject(findLine(lines, /^Porch\/Patio/i)),
289
+ boundingBoxes: subBB,
290
+ };
291
+ // Parse 3 comparables
292
+ const comparables = [];
293
+ for (let i = 0; i < 3; i++) {
294
+ comparables.push(parseComparable(lines, compStartNum + i, COMP_COLS[i]));
295
+ }
296
+ // Summary of Sales Comparison
297
+ const sumLine = findLine(lines, /^Summary of Sales Comparison/i);
298
+ let summaryOfSalesComparison = "";
299
+ if (sumLine) {
300
+ const idx = lines.indexOf(sumLine);
301
+ const parts = [];
302
+ const valueSeg = sumLine.segments.find((s) => s.x > 150);
303
+ if (valueSeg)
304
+ parts.push(valueSeg.text.trim());
305
+ for (let i = idx + 1; i < lines.length; i++) {
306
+ if (/^Indicated Value|^INCOME/i.test(lines[i].fullText))
307
+ break;
308
+ const t = lines[i].fullText.trim();
309
+ if (t)
310
+ parts.push(t);
311
+ }
312
+ summaryOfSalesComparison = parts.join(" ").trim();
313
+ if (sumLine.segments[0])
314
+ bb.summaryOfSalesComparison = toBBox(sumLine.segments[0], sumLine);
315
+ }
316
+ // Indicated Value by Sales Comparison
317
+ const indLine = findLine(lines, /^Indicated Value by Sales Comparison Approach\s*\$/i);
318
+ let indicatedValueBySalesComparison = null;
319
+ if (indLine) {
320
+ const seg = indLine.segments.find((s) => s.x > 150 && /^\d/.test(s.text.trim()));
321
+ if (seg) {
322
+ indicatedValueBySalesComparison = parseNum(seg.text);
323
+ bb.indicatedBySales = toBBox(seg, indLine);
324
+ }
325
+ }
326
+ return {
327
+ activeListingsCount, activeListingsLow, activeListingsHigh,
328
+ comparableSalesCount, comparableSalesLow, comparableSalesHigh,
329
+ subject, comparables, summaryOfSalesComparison, indicatedValueBySalesComparison,
330
+ boundingBoxes: bb,
331
+ };
332
+ }
333
+ // ── Reconciliation ───────────────────────────────────────────────────────
334
+ export function parseReconciliationSection(lines) {
335
+ const bb = {};
336
+ const indAllLine = findLine(lines, /^Indicated Value by: Sales Comparison/i);
337
+ let indicatedValueBySalesComparison = null, indicatedValueByIncomeApproach = null;
338
+ if (indAllLine) {
339
+ const salesSeg = indAllLine.segments.find((s) => s.x > 180 && s.x < 260 && /^\d/.test(s.text.trim()));
340
+ if (salesSeg) {
341
+ indicatedValueBySalesComparison = parseNum(salesSeg.text);
342
+ bb.indicatedBySales = toBBox(salesSeg, indAllLine);
343
+ }
344
+ for (const seg of indAllLine.segments) {
345
+ if (/Income Approach.*\$\s*([\d,]+)/i.test(seg.text)) {
346
+ const m = seg.text.match(/\$\s*([\d,]+)/);
347
+ if (m)
348
+ indicatedValueByIncomeApproach = parseNum(m[1]);
349
+ }
350
+ }
351
+ }
352
+ // Reconciliation comments
353
+ const recLine = findLine(lines, /The Direct Sales Comparison|ALL WEIGHT GIVEN|reconciliation/i);
354
+ let reconciliationComments = "";
355
+ if (recLine) {
356
+ const idx = lines.indexOf(recLine);
357
+ const parts = [recLine.fullText.trim()];
358
+ for (let i = idx + 1; i < lines.length; i++) {
359
+ if (/^This appraisal is made|^RECONCILIATION/i.test(lines[i].fullText))
360
+ break;
361
+ const t = lines[i].fullText.trim();
362
+ if (t)
363
+ parts.push(t);
364
+ }
365
+ reconciliationComments = parts.join(" ").trim();
366
+ }
367
+ // Appraisal basis
368
+ const basisLine = findLine(lines, /This appraisal is made/i);
369
+ let appraisalBasis = "";
370
+ if (basisLine) {
371
+ const idx = lines.indexOf(basisLine);
372
+ const parts = [basisLine.fullText.replace(/^RECONCILIATION/i, "").trim()];
373
+ for (let i = idx + 1; i < lines.length; i++) {
374
+ if (/^Based on a complete|^\$/i.test(lines[i].fullText))
375
+ break;
376
+ parts.push(lines[i].fullText.trim());
377
+ }
378
+ appraisalBasis = parts.join(" ").trim();
379
+ }
380
+ // Final value and effective date
381
+ const valueLine = findLine(lines, /^\$\s*[\d,]+\s*,\s*as of/i);
382
+ let finalValue = null, effectiveDate = "";
383
+ if (valueLine) {
384
+ for (const seg of valueLine.segments) {
385
+ if (/^\d[\d,]*$/.test(seg.text.trim())) {
386
+ finalValue = parseNum(seg.text);
387
+ bb.finalValue = toBBox(seg, valueLine);
388
+ }
389
+ if (/^\d{2}\/\d{2}\/\d{4}$/.test(seg.text.trim())) {
390
+ effectiveDate = seg.text.trim();
391
+ bb.effectiveDate = toBBox(seg, valueLine);
392
+ }
393
+ }
394
+ }
395
+ return { indicatedValueBySalesComparison, indicatedValueByIncomeApproach, reconciliationComments, appraisalBasis, finalValue, effectiveDate, boundingBoxes: bb };
396
+ }
397
+ // ── Appraiser Info (Page 6) ──────────────────────────────────────────────
398
+ export function parseAppraiserInfo(lines, supervisory) {
399
+ const bb = {};
400
+ const xMin = supervisory ? 290 : 0;
401
+ const xMax = supervisory ? 600 : 290;
402
+ function findFieldInRange(label) {
403
+ for (const l of lines) {
404
+ const seg = l.segments.find((s) => s.x >= xMin && s.x < xMax && label.test(s.text));
405
+ if (seg) {
406
+ const val = seg.text.replace(label, "").trim();
407
+ if (val)
408
+ return val;
409
+ const next = l.segments.find((s) => s.x > seg.x && s.x < xMax && !label.test(s.text));
410
+ if (next)
411
+ return next.text.trim();
412
+ }
413
+ }
414
+ return "";
415
+ }
416
+ const name = findFieldInRange(/^(Supervisory )?Appraiser\s*$|^Name\s+/i);
417
+ // For supervisory: if name is empty, there is no supervisory appraiser
418
+ if (supervisory && !name)
419
+ return null;
420
+ const nameLine = lines.find((l) => l.segments.some((s) => s.x >= xMin && s.x < xMax && /^Name\s/i.test(s.text)));
421
+ let parsedName = "";
422
+ if (nameLine) {
423
+ const seg = nameLine.segments.find((s) => s.x >= xMin && s.x < xMax && /^Name\s/i.test(s.text));
424
+ if (seg) {
425
+ parsedName = seg.text.replace(/^Name\s+/i, "").trim();
426
+ bb.name = toBBox(seg, nameLine);
427
+ }
428
+ }
429
+ return {
430
+ name: parsedName || name,
431
+ companyName: findFieldInRange(/^Company Name\s*/i),
432
+ companyAddress: findFieldInRange(/^Company Address\s*/i),
433
+ telephoneNumber: findFieldInRange(/^Telephone Number\s*/i),
434
+ emailAddress: findFieldInRange(/^Email Address\s*/i),
435
+ dateOfSignature: findFieldInRange(/^Date of Signature( and Report)?\s*/i),
436
+ effectiveDateOfAppraisal: findFieldInRange(/^Effective Date of Appraisal\s*/i),
437
+ stateCertification: findFieldInRange(/^State Certification #\s*/i),
438
+ stateOrLicense: findFieldInRange(/^or State License #\s*/i),
439
+ state: findFieldInRange(/^State\s+(?!Certification|License)/i),
440
+ expirationDate: findFieldInRange(/^Expiration Date of Certification or License\s*/i),
441
+ boundingBoxes: bb,
442
+ };
443
+ }
444
+ // ── Lender/Client Info (Page 6) ──────────────────────────────────────────
445
+ export function parseLenderClientInfo(lines) {
446
+ const bb = {};
447
+ // Lender info is in the lower-left area of page 6
448
+ const lenderHeader = findLine(lines, /^LENDER\/CLIENT$/i);
449
+ if (!lenderHeader)
450
+ return { name: "", companyName: "", companyAddress: "", emailAddress: "", boundingBoxes: bb };
451
+ const startY = lenderHeader.y;
452
+ const lenderLines = lines.filter((l) => l.y > startY && l.segments.some((s) => s.x < 290));
453
+ function getField(label) {
454
+ for (const l of lenderLines) {
455
+ for (const seg of l.segments) {
456
+ if (seg.x >= 290)
457
+ continue;
458
+ if (label.test(seg.text)) {
459
+ const val = seg.text.replace(label, "").trim();
460
+ if (val)
461
+ return val;
462
+ const next = l.segments.find((s) => s.x > seg.x && s.x < 290 && !label.test(s.text));
463
+ if (next)
464
+ return next.text.trim();
465
+ }
466
+ }
467
+ }
468
+ return "";
469
+ }
470
+ return {
471
+ name: getField(/^Name\s*/i),
472
+ companyName: getField(/^Company Name\s*/i),
473
+ companyAddress: getField(/^Company Address\s*/i),
474
+ emailAddress: getField(/^Email Address\s*/i),
475
+ boundingBoxes: bb,
476
+ };
477
+ }
@@ -0,0 +1,5 @@
1
+ import type { TextLine } from "@parseo/shared";
2
+ import type { Form1073Report } from "./types.js";
3
+ export declare function parseForm1073(buffer: Buffer): Promise<Form1073Report>;
4
+ export declare function parseForm1073FromLines(lines: TextLine[], buffer?: Buffer, pageOffset?: number): Promise<Form1073Report>;
5
+ //# sourceMappingURL=parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/form-1073/parser.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAc,MAAM,gBAAgB,CAAC;AAC3D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAKjD,wBAAsB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CAG3E;AAED,wBAAsB,sBAAsB,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,MAAM,CAAC,EAAE,MAAM,EAAE,UAAU,SAAI,GAAG,OAAO,CAAC,cAAc,CAAC,CAyGxH"}
@@ -0,0 +1,102 @@
1
+ import { extractLines, extractFilledRects, UnrecognizedFormatError } from "@parseo/shared";
2
+ import { parseSubjectSection, parseContractSection, parseNeighborhoodSection, parseProjectSiteSection, parseProjectInfoSection } from "./parse-page1.js";
3
+ import { parseProjectAnalysisSection, parseUnitDescriptionSection, parsePriorSaleHistorySection } from "./parse-page2.js";
4
+ import { parseSalesComparisonSection, parseReconciliationSection, parseAppraiserInfo, parseLenderClientInfo } from "./parse-sales.js";
5
+ export async function parseForm1073(buffer) {
6
+ const lines = await extractLines(buffer);
7
+ return parseForm1073FromLines(lines, buffer);
8
+ }
9
+ export async function parseForm1073FromLines(lines, buffer, pageOffset = 0) {
10
+ // Format fingerprint: must have "Individual Condominium Unit Appraisal Report"
11
+ // or "Form 1073" in the first 30 lines
12
+ const head = lines.slice(0, 30).map((l) => l.fullText).join("\n");
13
+ if (!/Individual Condominium Unit Appraisal Report|Form 1073/i.test(head)) {
14
+ throw new UnrecognizedFormatError("Form1073", "first 30 lines do not contain a Form 1073 / Individual Condominium Unit Appraisal Report signature");
15
+ }
16
+ // Find the page that starts the main form (has the title)
17
+ let formStartPage = 1;
18
+ for (const l of lines) {
19
+ if (/Individual Condominium Unit Appraisal Report/i.test(l.fullText)) {
20
+ formStartPage = l.page;
21
+ break;
22
+ }
23
+ }
24
+ // If the form doesn't start on page 1, remap page numbers
25
+ let internalOffset = 0;
26
+ let workLines = lines;
27
+ if (formStartPage > 1) {
28
+ internalOffset = formStartPage - 1;
29
+ workLines = lines
30
+ .filter((l) => l.page >= formStartPage)
31
+ .map((l) => ({ ...l, page: l.page - internalOffset }));
32
+ }
33
+ // ── Extract checkbox rects from the PDF graphics layer ──
34
+ let checkboxRects = [];
35
+ if (buffer) {
36
+ // The real PDF page accounts for both server-level skipped pages and internal form offset
37
+ const pdfPage1 = pageOffset + internalOffset + 1;
38
+ checkboxRects = await extractFilledRects(buffer, [pdfPage1], { minSize: 3, maxSize: 10 });
39
+ }
40
+ // ── Page 1: Subject, Contract, Neighborhood, Project Site, Project Info ──
41
+ const page1 = workLines.filter((l) => l.page === 1);
42
+ const subject = parseSubjectSection(page1);
43
+ const contract = parseContractSection(page1);
44
+ const neighborhood = parseNeighborhoodSection(page1, checkboxRects);
45
+ const projectSite = parseProjectSiteSection(page1);
46
+ const projectInfo = parseProjectInfoSection(page1);
47
+ // ── Page 2: Project Analysis, Unit Description, Prior Sale History ──
48
+ const page2 = workLines.filter((l) => l.page === 2);
49
+ const projectAnalysis = parseProjectAnalysisSection(page2);
50
+ const unitDescription = parseUnitDescriptionSection(page2);
51
+ const priorSaleHistory = parsePriorSaleHistorySection(page2);
52
+ // ── Page 3: Sales Comparison (comps 1-3), Reconciliation ──
53
+ const page3 = workLines.filter((l) => l.page === 3);
54
+ const salesComparison = parseSalesComparisonSection(page3);
55
+ const reconciliation = parseReconciliationSection(page3);
56
+ // ── Pages 4-5: Definitions/Certifications (boilerplate, skip) ──
57
+ // ── Page 6: Appraiser info ──
58
+ const page6 = workLines.filter((l) => l.page === 6);
59
+ const appraiser = parseAppraiserInfo(page6, false) ?? {
60
+ name: "", companyName: "", companyAddress: "", telephoneNumber: "",
61
+ emailAddress: "", dateOfSignature: "", effectiveDateOfAppraisal: "",
62
+ stateCertification: "", stateOrLicense: "", state: "", expirationDate: "",
63
+ boundingBoxes: {},
64
+ };
65
+ const supervisoryAppraiser = parseAppraiserInfo(page6, true);
66
+ const lenderClient = parseLenderClientInfo(page6);
67
+ // ── Additional comparables pages (7, 8, ...) ──
68
+ for (let pageNum = 7; pageNum <= Math.max(...workLines.map((l) => l.page)); pageNum++) {
69
+ const pageLines = workLines.filter((l) => l.page === pageNum);
70
+ if (pageLines.length === 0)
71
+ continue;
72
+ // Check if this page has a comparable sales grid
73
+ const hasCompGrid = pageLines.some((l) => /^FEATURE\s+SUBJECT\s+COMPARABLE SALE/i.test(l.fullText));
74
+ if (!hasCompGrid)
75
+ continue;
76
+ // Determine the comp start number from the header
77
+ const headerLine = pageLines.find((l) => /COMPARABLE SALE #\s*(\d+)/i.test(l.fullText));
78
+ let compStart = salesComparison.comparables.length + 1;
79
+ if (headerLine) {
80
+ const m = headerLine.fullText.match(/COMPARABLE SALE #\s*(\d+)/i);
81
+ if (m)
82
+ compStart = parseInt(m[1], 10);
83
+ }
84
+ const additionalComps = parseSalesComparisonSection(pageLines, compStart);
85
+ salesComparison.comparables.push(...additionalComps.comparables);
86
+ }
87
+ return {
88
+ subject,
89
+ contract,
90
+ neighborhood,
91
+ projectSite,
92
+ projectInfo,
93
+ projectAnalysis,
94
+ unitDescription,
95
+ priorSaleHistory,
96
+ salesComparison,
97
+ reconciliation,
98
+ appraiser,
99
+ supervisoryAppraiser,
100
+ lenderClient,
101
+ };
102
+ }