@parseo/appraisals 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +35 -0
  2. package/dist/form-1004mc/extract-checkboxes.d.ts +43 -0
  3. package/dist/form-1004mc/extract-checkboxes.d.ts.map +1 -0
  4. package/dist/form-1004mc/extract-checkboxes.js +145 -0
  5. package/dist/form-1004mc/index.d.ts +3 -0
  6. package/dist/form-1004mc/index.d.ts.map +1 -0
  7. package/dist/form-1004mc/index.js +1 -0
  8. package/dist/form-1004mc/parse-page1.d.ts +8 -0
  9. package/dist/form-1004mc/parse-page1.d.ts.map +1 -0
  10. package/dist/form-1004mc/parse-page1.js +760 -0
  11. package/dist/form-1004mc/parse-sales.d.ts +6 -0
  12. package/dist/form-1004mc/parse-sales.d.ts.map +1 -0
  13. package/dist/form-1004mc/parse-sales.js +505 -0
  14. package/dist/form-1004mc/parser.d.ts +5 -0
  15. package/dist/form-1004mc/parser.d.ts.map +1 -0
  16. package/dist/form-1004mc/parser.js +437 -0
  17. package/dist/form-1004mc/types.d.ts +302 -0
  18. package/dist/form-1004mc/types.d.ts.map +1 -0
  19. package/dist/form-1004mc/types.js +1 -0
  20. package/dist/form-1073/index.d.ts +3 -0
  21. package/dist/form-1073/index.d.ts.map +1 -0
  22. package/dist/form-1073/index.js +1 -0
  23. package/dist/form-1073/parse-page1.d.ts +8 -0
  24. package/dist/form-1073/parse-page1.d.ts.map +1 -0
  25. package/dist/form-1073/parse-page1.js +704 -0
  26. package/dist/form-1073/parse-page2.d.ts +6 -0
  27. package/dist/form-1073/parse-page2.d.ts.map +1 -0
  28. package/dist/form-1073/parse-page2.js +438 -0
  29. package/dist/form-1073/parse-sales.d.ts +7 -0
  30. package/dist/form-1073/parse-sales.d.ts.map +1 -0
  31. package/dist/form-1073/parse-sales.js +477 -0
  32. package/dist/form-1073/parser.d.ts +5 -0
  33. package/dist/form-1073/parser.d.ts.map +1 -0
  34. package/dist/form-1073/parser.js +102 -0
  35. package/dist/form-1073/types.d.ts +300 -0
  36. package/dist/form-1073/types.d.ts.map +1 -0
  37. package/dist/form-1073/types.js +1 -0
  38. package/dist/index.d.ts +13 -0
  39. package/dist/index.d.ts.map +1 -0
  40. package/dist/index.js +9 -0
  41. package/dist/richer-values/index.d.ts +3 -0
  42. package/dist/richer-values/index.d.ts.map +1 -0
  43. package/dist/richer-values/index.js +1 -0
  44. package/dist/richer-values/parser.d.ts +5 -0
  45. package/dist/richer-values/parser.d.ts.map +1 -0
  46. package/dist/richer-values/parser.js +1067 -0
  47. package/dist/richer-values/types.d.ts +225 -0
  48. package/dist/richer-values/types.d.ts.map +1 -0
  49. package/dist/richer-values/types.js +1 -0
  50. package/package.json +24 -0
package/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # @parseo/appraisals
2
+
3
+ Deterministic PDF parsers for appraisal reports. Supports Richer Values, Form 1004-MC (URAR), and Form 1073 (Condo).
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @parseo/appraisals
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```typescript
14
+ import { richerValues, form1004MC, form1073 } from "@parseo/appraisals";
15
+
16
+ const report = await richerValues(buffer); // RicherValuesReport
17
+ const report = await form1004MC(buffer); // Form1004MCReport
18
+ const report = await form1073(buffer); // Form1073Report
19
+ ```
20
+
21
+ | Format | Import name |
22
+ |---|---|
23
+ | Richer Values | `richerValues` |
24
+ | Form 1004-MC (URAR) | `form1004MC` |
25
+ | Form 1073 (Condo) | `form1073` |
26
+
27
+ ## Data conventions
28
+
29
+ - **Dates**: ISO 8601 strings (`"2024-08-31"`) or `null`
30
+ - **Currency**: Plain numbers (`54961.89`, not `"$54,961.89"`)
31
+ - **Bounding boxes**: `{ x, y, width, height, pageNumber }` on every extracted field
32
+
33
+ ## License
34
+
35
+ MIT
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Checkbox extraction from PDF vector graphics.
3
+ *
4
+ * In flattened TOTAL-generated PDFs, checkbox marks are rendered as
5
+ * constructPath operations (small ~8.4×8.4 pt shapes). An unchecked
6
+ * checkbox produces 1 path (the empty square outline). A checked
7
+ * checkbox produces 5 paths (square outline + 4 X-mark line segments).
8
+ *
9
+ * We exploit this by counting how many path shapes fall at each
10
+ * position — ≥ 3 means checked.
11
+ */
12
+ export interface CheckedPosition {
13
+ /** x of the checkbox square (left edge) */
14
+ x: number;
15
+ /** y in top-down text coordinates (matches TextLine.y) */
16
+ y: number;
17
+ }
18
+ /**
19
+ * Extract all checked checkbox positions from a given PDF page.
20
+ *
21
+ * @returns Array of {x, y} positions where a checkbox is checked,
22
+ * in the same coordinate space as extracted TextLine objects.
23
+ */
24
+ export declare function extractCheckedBoxes(buffer: Buffer, pageNum: number): Promise<CheckedPosition[]>;
25
+ /**
26
+ * Given a list of checked positions and a set of checkbox options
27
+ * at known x-positions on a given text-y row, return which option is checked.
28
+ *
29
+ * The checkbox square is rendered ~7pt above the text label (lower y value
30
+ * in top-down coordinates) and ~12pt to its left. Since rows are only
31
+ * ~11pt apart, we use a directional y-match: the checkbox must be above
32
+ * the text line (yDiff in [2, 12]) to avoid cross-row false positives.
33
+ *
34
+ * @param checked - Array of checked checkbox positions
35
+ * @param textY - The y-coordinate of the text row (from TextLine.y)
36
+ * @param options - Map of checkbox x-position to option label
37
+ * @param xTolerance - x-axis matching tolerance in points (default 6)
38
+ */
39
+ export declare function resolveCheckbox(checked: CheckedPosition[], textY: number, options: {
40
+ x: number;
41
+ label: string;
42
+ }[], xTolerance?: number): string;
43
+ //# sourceMappingURL=extract-checkboxes.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extract-checkboxes.d.ts","sourceRoot":"","sources":["../../src/form-1004mc/extract-checkboxes.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAqBH,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,CAAC,EAAE,MAAM,CAAC;IACV,0DAA0D;IAC1D,CAAC,EAAE,MAAM,CAAC;CACX;AAED;;;;;GAKG;AACH,wBAAsB,mBAAmB,CACvC,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,eAAe,EAAE,CAAC,CAgG5B;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,eAAe,CAC7B,OAAO,EAAE,eAAe,EAAE,EAC1B,KAAK,EAAE,MAAM,EACb,OAAO,EAAE;IAAE,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EAAE,EACvC,UAAU,SAAI,GACb,MAAM,CAWR"}
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Checkbox extraction from PDF vector graphics.
3
+ *
4
+ * In flattened TOTAL-generated PDFs, checkbox marks are rendered as
5
+ * constructPath operations (small ~8.4×8.4 pt shapes). An unchecked
6
+ * checkbox produces 1 path (the empty square outline). A checked
7
+ * checkbox produces 5 paths (square outline + 4 X-mark line segments).
8
+ *
9
+ * We exploit this by counting how many path shapes fall at each
10
+ * position — ≥ 3 means checked.
11
+ */
12
+ import { OPS, getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
13
+ function multiply(a, b) {
14
+ return [
15
+ a[0] * b[0] + a[2] * b[1],
16
+ a[1] * b[0] + a[3] * b[1],
17
+ a[0] * b[2] + a[2] * b[3],
18
+ a[1] * b[2] + a[3] * b[3],
19
+ a[0] * b[4] + a[2] * b[5] + a[4],
20
+ a[1] * b[4] + a[3] * b[5] + a[5],
21
+ ];
22
+ }
23
+ function applyM(m, x, y) {
24
+ return [m[0] * x + m[2] * y + m[4], m[1] * x + m[3] * y + m[5]];
25
+ }
26
+ /**
27
+ * Extract all checked checkbox positions from a given PDF page.
28
+ *
29
+ * @returns Array of {x, y} positions where a checkbox is checked,
30
+ * in the same coordinate space as extracted TextLine objects.
31
+ */
32
+ export async function extractCheckedBoxes(buffer, pageNum) {
33
+ const uint8 = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength);
34
+ const pdf = await getDocument({ data: uint8, useSystemFonts: true }).promise;
35
+ const page = await pdf.getPage(pageNum);
36
+ const viewport = page.getViewport({ scale: 1 });
37
+ const pageHeight = viewport.height;
38
+ const ops = await page.getOperatorList();
39
+ const opsNames = {};
40
+ for (const [name, val] of Object.entries(OPS)) {
41
+ opsNames[val] = name;
42
+ }
43
+ // Track current transformation matrix with save/restore stack
44
+ let ctm = [1, 0, 0, 1, 0, 0];
45
+ const stack = [];
46
+ // Collect all small (~8×8 pt) path shapes and their page coordinates
47
+ const shapes = [];
48
+ for (let i = 0; i < ops.fnArray.length; i++) {
49
+ const fn = ops.fnArray[i];
50
+ const args = ops.argsArray[i];
51
+ const name = opsNames[fn];
52
+ if (name === "save") {
53
+ stack.push([...ctm]);
54
+ continue;
55
+ }
56
+ if (name === "restore") {
57
+ ctm = stack.pop() ?? [1, 0, 0, 1, 0, 0];
58
+ continue;
59
+ }
60
+ if (name === "transform") {
61
+ ctm = multiply(ctm, args);
62
+ continue;
63
+ }
64
+ if (name === "constructPath") {
65
+ const [opcodes, pathArgs] = args;
66
+ let ai = 0;
67
+ const points = [];
68
+ for (const op of opcodes) {
69
+ if (op === 13 /* moveTo */ || op === 14 /* lineTo */) {
70
+ const px = pathArgs[ai++];
71
+ const py = pathArgs[ai++];
72
+ points.push(applyM(ctm, px, py));
73
+ }
74
+ else if (op === 19 /* rectangle */) {
75
+ const rx = pathArgs[ai++], ry = pathArgs[ai++];
76
+ const rw = pathArgs[ai++], rh = pathArgs[ai++];
77
+ points.push(applyM(ctm, rx, ry));
78
+ points.push(applyM(ctm, rx + rw, ry + rh));
79
+ }
80
+ else if (op === 18 /* closePath */) {
81
+ // no args
82
+ }
83
+ }
84
+ if (points.length >= 2) {
85
+ const xs = points.map((p) => p[0]);
86
+ const ys = points.map((p) => p[1]);
87
+ const minX = Math.min(...xs);
88
+ const maxX = Math.max(...xs);
89
+ const minY = Math.min(...ys);
90
+ const maxY = Math.max(...ys);
91
+ const w = maxX - minX;
92
+ const h = maxY - minY;
93
+ // Checkbox squares are ~8.4×8.4 pt; allow 6-11 range
94
+ if (w >= 6 && w <= 11 && h >= 6 && h <= 11) {
95
+ // Convert from PDF bottom-up to top-down text coordinates
96
+ const textY = pageHeight - maxY;
97
+ shapes.push({ x: Math.round(minX * 10) / 10, y: Math.round(textY * 10) / 10 });
98
+ }
99
+ }
100
+ }
101
+ }
102
+ // Group shapes by position (within 3pt tolerance)
103
+ const groups = new Map();
104
+ for (const s of shapes) {
105
+ // Round to nearest 2pt grid to group nearby shapes
106
+ const key = `${Math.round(s.x / 2) * 2},${Math.round(s.y / 2) * 2}`;
107
+ groups.set(key, (groups.get(key) ?? 0) + 1);
108
+ }
109
+ // Checked checkboxes have >= 3 shapes at the same position
110
+ const checked = [];
111
+ for (const [key, count] of groups) {
112
+ if (count >= 3) {
113
+ const [x, y] = key.split(",").map(Number);
114
+ checked.push({ x, y });
115
+ }
116
+ }
117
+ return checked;
118
+ }
119
+ /**
120
+ * Given a list of checked positions and a set of checkbox options
121
+ * at known x-positions on a given text-y row, return which option is checked.
122
+ *
123
+ * The checkbox square is rendered ~7pt above the text label (lower y value
124
+ * in top-down coordinates) and ~12pt to its left. Since rows are only
125
+ * ~11pt apart, we use a directional y-match: the checkbox must be above
126
+ * the text line (yDiff in [2, 12]) to avoid cross-row false positives.
127
+ *
128
+ * @param checked - Array of checked checkbox positions
129
+ * @param textY - The y-coordinate of the text row (from TextLine.y)
130
+ * @param options - Map of checkbox x-position to option label
131
+ * @param xTolerance - x-axis matching tolerance in points (default 6)
132
+ */
133
+ export function resolveCheckbox(checked, textY, options, xTolerance = 6) {
134
+ for (const opt of options) {
135
+ // The checkbox square sits ~12pt to the left of the label text
136
+ const checkboxX = opt.x - 12.3;
137
+ const match = checked.find((c) => {
138
+ const yDiff = textY - c.y; // positive when checkbox is above text
139
+ return Math.abs(c.x - checkboxX) < xTolerance && yDiff >= 2 && yDiff <= 12;
140
+ });
141
+ if (match)
142
+ return opt.label;
143
+ }
144
+ return "";
145
+ }
@@ -0,0 +1,3 @@
1
+ export { parseForm1004MC, parseForm1004MCFromLines } from "./parser.js";
2
+ export type { Form1004MCReport, SubjectSection, ContractSection, NeighborhoodSection, SiteSection, ImprovementsSection, ComparableSale, SalesComparisonSection, ReconciliationSection, CostApproachSection, MarketConditionsAddendum, Form1004MCHeader, InventoryAnalysis, MedianSaleListData, MarketAnalysisText, CondoCoopProjects, AppraiserInfo, TimePeriodRow, Trend, } from "./types.js";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/form-1004mc/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAC;AACxE,YAAY,EACV,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,mBAAmB,EACnB,WAAW,EACX,mBAAmB,EACnB,cAAc,EACd,sBAAsB,EACtB,qBAAqB,EACrB,mBAAmB,EACnB,wBAAwB,EACxB,gBAAgB,EAChB,iBAAiB,EACjB,kBAAkB,EAClB,kBAAkB,EAClB,iBAAiB,EACjB,aAAa,EACb,aAAa,EACb,KAAK,GACN,MAAM,YAAY,CAAC"}
@@ -0,0 +1 @@
1
+ export { parseForm1004MC, parseForm1004MCFromLines } from "./parser.js";
@@ -0,0 +1,8 @@
1
+ import type { TextLine } from "@parseo/shared";
2
+ import type { SubjectSection, ContractSection, NeighborhoodSection, SiteSection, ImprovementsSection } from "./types.js";
3
+ export declare function parseSubjectSection(lines: TextLine[]): SubjectSection;
4
+ export declare function parseContractSection(lines: TextLine[]): ContractSection;
5
+ export declare function parseNeighborhoodSection(lines: TextLine[]): NeighborhoodSection;
6
+ export declare function parseSiteSection(lines: TextLine[]): SiteSection;
7
+ export declare function parseImprovementsSection(lines: TextLine[]): ImprovementsSection;
8
+ //# sourceMappingURL=parse-page1.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse-page1.d.ts","sourceRoot":"","sources":["../../src/form-1004mc/parse-page1.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAe,MAAM,gBAAgB,CAAC;AAC5D,OAAO,KAAK,EACV,cAAc,EACd,eAAe,EACf,mBAAmB,EACnB,WAAW,EACX,mBAAmB,EACpB,MAAM,YAAY,CAAC;AAiDpB,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,cAAc,CAuGrE;AAID,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,eAAe,CA6CvE;AAID,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,mBAAmB,CA0I/E;AAID,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,WAAW,CAkE/D;AAID,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,mBAAmB,CAgN/E"}