@parseo/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1 -0
- package/dist/parse.d.ts +19 -0
- package/dist/parse.d.ts.map +1 -0
- package/dist/parse.js +111 -0
- package/package.json +28 -0
package/README.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# @parseo/core
|
|
2
|
+
|
|
3
|
+
Universal document parser for underwriting PDFs. Auto-classifies the document and routes to the correct parser.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @parseo/core
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
This installs all Parseo parsers (credit reports, background checks, appraisals, bank statements).
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { parse } from "@parseo/core";
|
|
17
|
+
|
|
18
|
+
const result = await parse(buffer);
|
|
19
|
+
|
|
20
|
+
if (result) {
|
|
21
|
+
result.format; // "chase", "xactus", "smartlinx", etc.
|
|
22
|
+
result.data; // Parsed document (type depends on format)
|
|
23
|
+
result.confidence; // Classifier confidence score
|
|
24
|
+
result.skippedPages; // Number of intro pages stripped
|
|
25
|
+
}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
`parse()` handles text extraction, classification, page skipping, and bounding box offset correction. Returns `null` if no known format is detected.
|
|
29
|
+
|
|
30
|
+
## License
|
|
31
|
+
|
|
32
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,YAAY,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { parse } from "./parse.js";
|
package/dist/parse.d.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { FormatName } from "@parseo/shared";
|
|
2
|
+
export interface ParseResult {
|
|
3
|
+
/** Which format was detected */
|
|
4
|
+
format: FormatName;
|
|
5
|
+
/** Parsed data (type depends on format) */
|
|
6
|
+
data: unknown;
|
|
7
|
+
/** Number of intro pages that were skipped */
|
|
8
|
+
skippedPages: number;
|
|
9
|
+
/** Classifier confidence score */
|
|
10
|
+
confidence: number;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Universal parser. Extracts text from the PDF, classifies the document,
|
|
14
|
+
* and routes to the correct parser.
|
|
15
|
+
*
|
|
16
|
+
* Returns `null` if no known format is detected.
|
|
17
|
+
*/
|
|
18
|
+
export declare function parse(buffer: Buffer): Promise<ParseResult | null>;
|
|
19
|
+
//# sourceMappingURL=parse.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse.d.ts","sourceRoot":"","sources":["../src/parse.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAY,UAAU,EAAkB,MAAM,gBAAgB,CAAC;AA4B3E,MAAM,WAAW,WAAW;IAC1B,gCAAgC;IAChC,MAAM,EAAE,UAAU,CAAC;IACnB,2CAA2C;IAC3C,IAAI,EAAE,OAAO,CAAC;IACd,8CAA8C;IAC9C,YAAY,EAAE,MAAM,CAAC;IACrB,kCAAkC;IAClC,UAAU,EAAE,MAAM,CAAC;CACpB;AA4BD;;;;;GAKG;AACH,wBAAsB,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAgFvE"}
|
package/dist/parse.js
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { extractLines, classifyDocument } from "@parseo/shared";
|
|
2
|
+
import { parseSmartLinxReportFromLines } from "@parseo/background-checks";
|
|
3
|
+
import { parseCreditReportFromLines } from "@parseo/credit-reports";
|
|
4
|
+
import { parseRicherValuesReportFromLines, parseForm1004MCFromLines, parseForm1073FromLines, } from "@parseo/appraisals";
|
|
5
|
+
import { parseWellsFargoFromLines, parseTDBankFromLines, parseChaseFromLines, parseBankOfAmericaFromLines, parseNavyFederalFromLines, parseThirdFederalFromLines, parseCitibankFromLines, parseRelayFromLines, parseGroveBankFromLines, parseCapitalOneFromLines, parseTruistFromLines, parsePNCFromLines, parseDiscoverFromLines, parseSynovusFromLines, } from "@parseo/bank-statements";
|
|
6
|
+
// ── Page helpers ─────────────────────────────────────────────
|
|
7
|
+
function skipPages(lines, pagesToSkip) {
|
|
8
|
+
const minPage = lines.length > 0 ? lines[0].page : 1;
|
|
9
|
+
const firstKeptPage = minPage + pagesToSkip;
|
|
10
|
+
return lines
|
|
11
|
+
.filter((l) => l.page >= firstKeptPage)
|
|
12
|
+
.map((l) => ({ ...l, page: l.page - pagesToSkip }));
|
|
13
|
+
}
|
|
14
|
+
function offsetBoundingBoxPages(obj, offset) {
|
|
15
|
+
if (offset === 0 || obj == null || typeof obj !== "object")
|
|
16
|
+
return;
|
|
17
|
+
const record = obj;
|
|
18
|
+
if (typeof record.pageNumber === "number" && "x" in record && "y" in record) {
|
|
19
|
+
record.pageNumber = record.pageNumber + offset;
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
for (const value of Object.values(record)) {
|
|
23
|
+
if (value != null && typeof value === "object") {
|
|
24
|
+
offsetBoundingBoxPages(value, offset);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
// ── Main ─────────────────────────────────────────────────────
|
|
29
|
+
/**
|
|
30
|
+
* Universal parser. Extracts text from the PDF, classifies the document,
|
|
31
|
+
* and routes to the correct parser.
|
|
32
|
+
*
|
|
33
|
+
* Returns `null` if no known format is detected.
|
|
34
|
+
*/
|
|
35
|
+
export async function parse(buffer) {
|
|
36
|
+
const allLines = await extractLines(buffer);
|
|
37
|
+
const classification = classifyDocument(allLines);
|
|
38
|
+
if (!classification)
|
|
39
|
+
return null;
|
|
40
|
+
const { format, skip, confidence } = classification;
|
|
41
|
+
const lines = skip > 0 ? skipPages(allLines, skip) : allLines;
|
|
42
|
+
let data;
|
|
43
|
+
switch (format) {
|
|
44
|
+
case "smartlinx":
|
|
45
|
+
data = parseSmartLinxReportFromLines(lines);
|
|
46
|
+
break;
|
|
47
|
+
case "credit-report": {
|
|
48
|
+
const cr = parseCreditReportFromLines(lines);
|
|
49
|
+
data = {
|
|
50
|
+
format: cr.format,
|
|
51
|
+
report: cr.report,
|
|
52
|
+
...(cr.creditXpert ? { creditXpert: cr.creditXpert } : {}),
|
|
53
|
+
};
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
case "richer-values":
|
|
57
|
+
data = parseRicherValuesReportFromLines(lines);
|
|
58
|
+
break;
|
|
59
|
+
case "form-1004mc":
|
|
60
|
+
data = await parseForm1004MCFromLines(lines, buffer, skip);
|
|
61
|
+
break;
|
|
62
|
+
case "form-1073":
|
|
63
|
+
data = await parseForm1073FromLines(lines, buffer, skip);
|
|
64
|
+
break;
|
|
65
|
+
case "wells-fargo":
|
|
66
|
+
data = parseWellsFargoFromLines(lines);
|
|
67
|
+
break;
|
|
68
|
+
case "td-bank":
|
|
69
|
+
data = parseTDBankFromLines(lines);
|
|
70
|
+
break;
|
|
71
|
+
case "chase":
|
|
72
|
+
data = parseChaseFromLines(lines);
|
|
73
|
+
break;
|
|
74
|
+
case "bank-of-america":
|
|
75
|
+
data = parseBankOfAmericaFromLines(lines);
|
|
76
|
+
break;
|
|
77
|
+
case "navy-federal":
|
|
78
|
+
data = parseNavyFederalFromLines(lines);
|
|
79
|
+
break;
|
|
80
|
+
case "third-federal":
|
|
81
|
+
data = parseThirdFederalFromLines(lines);
|
|
82
|
+
break;
|
|
83
|
+
case "citibank":
|
|
84
|
+
data = parseCitibankFromLines(lines);
|
|
85
|
+
break;
|
|
86
|
+
case "relay":
|
|
87
|
+
data = parseRelayFromLines(lines);
|
|
88
|
+
break;
|
|
89
|
+
case "grove-bank":
|
|
90
|
+
data = parseGroveBankFromLines(lines);
|
|
91
|
+
break;
|
|
92
|
+
case "capital-one":
|
|
93
|
+
data = parseCapitalOneFromLines(lines);
|
|
94
|
+
break;
|
|
95
|
+
case "truist":
|
|
96
|
+
data = parseTruistFromLines(lines);
|
|
97
|
+
break;
|
|
98
|
+
case "pnc":
|
|
99
|
+
data = parsePNCFromLines(lines);
|
|
100
|
+
break;
|
|
101
|
+
case "discover":
|
|
102
|
+
data = parseDiscoverFromLines(lines);
|
|
103
|
+
break;
|
|
104
|
+
case "synovus":
|
|
105
|
+
data = parseSynovusFromLines(lines);
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
if (skip > 0)
|
|
109
|
+
offsetBoundingBoxPages(data, skip);
|
|
110
|
+
return { format, data, skippedPages: skip, confidence };
|
|
111
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@parseo/core",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"main": "./dist/index.js",
|
|
6
|
+
"types": "./dist/index.d.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"import": "./dist/index.js"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"license": "MIT",
|
|
14
|
+
"publishConfig": {
|
|
15
|
+
"access": "public"
|
|
16
|
+
},
|
|
17
|
+
"files": ["dist"],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "tsc"
|
|
20
|
+
},
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"@parseo/shared": "1.0.0",
|
|
23
|
+
"@parseo/credit-reports": "1.0.0",
|
|
24
|
+
"@parseo/background-checks": "1.0.0",
|
|
25
|
+
"@parseo/appraisals": "1.0.0",
|
|
26
|
+
"@parseo/bank-statements": "1.0.0"
|
|
27
|
+
}
|
|
28
|
+
}
|