@parseo/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,32 @@
1
+ # @parseo/core
2
+
3
+ Universal document parser for underwriting PDFs. Auto-classifies the document and routes to the correct parser.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @parseo/core
9
+ ```
10
+
11
+ This installs all Parseo parsers (credit reports, background checks, appraisals, bank statements).
12
+
13
+ ## Usage
14
+
15
+ ```typescript
16
+ import { parse } from "@parseo/core";
17
+
18
+ const result = await parse(buffer);
19
+
20
+ if (result) {
21
+ result.format; // "chase", "xactus", "smartlinx", etc.
22
+ result.data; // Parsed document (type depends on format)
23
+ result.confidence; // Classifier confidence score
24
+ result.skippedPages; // Number of intro pages stripped
25
+ }
26
+ ```
27
+
28
+ `parse()` handles text extraction, classification, page skipping, and bounding box offset correction. Returns `null` if no known format is detected.
29
+
30
+ ## License
31
+
32
+ MIT
@@ -0,0 +1,3 @@
1
+ export { parse } from "./parse.js";
2
+ export type { ParseResult } from "./parse.js";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,YAAY,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1 @@
1
+ export { parse } from "./parse.js";
@@ -0,0 +1,19 @@
1
+ import type { FormatName } from "@parseo/shared";
2
+ export interface ParseResult {
3
+ /** Which format was detected */
4
+ format: FormatName;
5
+ /** Parsed data (type depends on format) */
6
+ data: unknown;
7
+ /** Number of intro pages that were skipped */
8
+ skippedPages: number;
9
+ /** Classifier confidence score */
10
+ confidence: number;
11
+ }
12
+ /**
13
+ * Universal parser. Extracts text from the PDF, classifies the document,
14
+ * and routes to the correct parser.
15
+ *
16
+ * Returns `null` if no known format is detected.
17
+ */
18
+ export declare function parse(buffer: Buffer): Promise<ParseResult | null>;
19
+ //# sourceMappingURL=parse.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse.d.ts","sourceRoot":"","sources":["../src/parse.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAY,UAAU,EAAkB,MAAM,gBAAgB,CAAC;AA4B3E,MAAM,WAAW,WAAW;IAC1B,gCAAgC;IAChC,MAAM,EAAE,UAAU,CAAC;IACnB,2CAA2C;IAC3C,IAAI,EAAE,OAAO,CAAC;IACd,8CAA8C;IAC9C,YAAY,EAAE,MAAM,CAAC;IACrB,kCAAkC;IAClC,UAAU,EAAE,MAAM,CAAC;CACpB;AA4BD;;;;;GAKG;AACH,wBAAsB,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAgFvE"}
package/dist/parse.js ADDED
@@ -0,0 +1,111 @@
1
+ import { extractLines, classifyDocument } from "@parseo/shared";
2
+ import { parseSmartLinxReportFromLines } from "@parseo/background-checks";
3
+ import { parseCreditReportFromLines } from "@parseo/credit-reports";
4
+ import { parseRicherValuesReportFromLines, parseForm1004MCFromLines, parseForm1073FromLines, } from "@parseo/appraisals";
5
+ import { parseWellsFargoFromLines, parseTDBankFromLines, parseChaseFromLines, parseBankOfAmericaFromLines, parseNavyFederalFromLines, parseThirdFederalFromLines, parseCitibankFromLines, parseRelayFromLines, parseGroveBankFromLines, parseCapitalOneFromLines, parseTruistFromLines, parsePNCFromLines, parseDiscoverFromLines, parseSynovusFromLines, } from "@parseo/bank-statements";
6
+ // ── Page helpers ─────────────────────────────────────────────
7
+ function skipPages(lines, pagesToSkip) {
8
+ const minPage = lines.length > 0 ? lines[0].page : 1;
9
+ const firstKeptPage = minPage + pagesToSkip;
10
+ return lines
11
+ .filter((l) => l.page >= firstKeptPage)
12
+ .map((l) => ({ ...l, page: l.page - pagesToSkip }));
13
+ }
14
+ function offsetBoundingBoxPages(obj, offset) {
15
+ if (offset === 0 || obj == null || typeof obj !== "object")
16
+ return;
17
+ const record = obj;
18
+ if (typeof record.pageNumber === "number" && "x" in record && "y" in record) {
19
+ record.pageNumber = record.pageNumber + offset;
20
+ return;
21
+ }
22
+ for (const value of Object.values(record)) {
23
+ if (value != null && typeof value === "object") {
24
+ offsetBoundingBoxPages(value, offset);
25
+ }
26
+ }
27
+ }
28
+ // ── Main ─────────────────────────────────────────────────────
29
+ /**
30
+ * Universal parser. Extracts text from the PDF, classifies the document,
31
+ * and routes to the correct parser.
32
+ *
33
+ * Returns `null` if no known format is detected.
34
+ */
35
+ export async function parse(buffer) {
36
+ const allLines = await extractLines(buffer);
37
+ const classification = classifyDocument(allLines);
38
+ if (!classification)
39
+ return null;
40
+ const { format, skip, confidence } = classification;
41
+ const lines = skip > 0 ? skipPages(allLines, skip) : allLines;
42
+ let data;
43
+ switch (format) {
44
+ case "smartlinx":
45
+ data = parseSmartLinxReportFromLines(lines);
46
+ break;
47
+ case "credit-report": {
48
+ const cr = parseCreditReportFromLines(lines);
49
+ data = {
50
+ format: cr.format,
51
+ report: cr.report,
52
+ ...(cr.creditXpert ? { creditXpert: cr.creditXpert } : {}),
53
+ };
54
+ break;
55
+ }
56
+ case "richer-values":
57
+ data = parseRicherValuesReportFromLines(lines);
58
+ break;
59
+ case "form-1004mc":
60
+ data = await parseForm1004MCFromLines(lines, buffer, skip);
61
+ break;
62
+ case "form-1073":
63
+ data = await parseForm1073FromLines(lines, buffer, skip);
64
+ break;
65
+ case "wells-fargo":
66
+ data = parseWellsFargoFromLines(lines);
67
+ break;
68
+ case "td-bank":
69
+ data = parseTDBankFromLines(lines);
70
+ break;
71
+ case "chase":
72
+ data = parseChaseFromLines(lines);
73
+ break;
74
+ case "bank-of-america":
75
+ data = parseBankOfAmericaFromLines(lines);
76
+ break;
77
+ case "navy-federal":
78
+ data = parseNavyFederalFromLines(lines);
79
+ break;
80
+ case "third-federal":
81
+ data = parseThirdFederalFromLines(lines);
82
+ break;
83
+ case "citibank":
84
+ data = parseCitibankFromLines(lines);
85
+ break;
86
+ case "relay":
87
+ data = parseRelayFromLines(lines);
88
+ break;
89
+ case "grove-bank":
90
+ data = parseGroveBankFromLines(lines);
91
+ break;
92
+ case "capital-one":
93
+ data = parseCapitalOneFromLines(lines);
94
+ break;
95
+ case "truist":
96
+ data = parseTruistFromLines(lines);
97
+ break;
98
+ case "pnc":
99
+ data = parsePNCFromLines(lines);
100
+ break;
101
+ case "discover":
102
+ data = parseDiscoverFromLines(lines);
103
+ break;
104
+ case "synovus":
105
+ data = parseSynovusFromLines(lines);
106
+ break;
107
+ }
108
+ if (skip > 0)
109
+ offsetBoundingBoxPages(data, skip);
110
+ return { format, data, skippedPages: skip, confidence };
111
+ }
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "@parseo/core",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "main": "./dist/index.js",
6
+ "types": "./dist/index.d.ts",
7
+ "exports": {
8
+ ".": {
9
+ "types": "./dist/index.d.ts",
10
+ "import": "./dist/index.js"
11
+ }
12
+ },
13
+ "license": "MIT",
14
+ "publishConfig": {
15
+ "access": "public"
16
+ },
17
+ "files": ["dist"],
18
+ "scripts": {
19
+ "build": "tsc"
20
+ },
21
+ "dependencies": {
22
+ "@parseo/shared": "1.0.0",
23
+ "@parseo/credit-reports": "1.0.0",
24
+ "@parseo/background-checks": "1.0.0",
25
+ "@parseo/appraisals": "1.0.0",
26
+ "@parseo/bank-statements": "1.0.0"
27
+ }
28
+ }