@pdfvector/instance-contract 0.0.26 → 0.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.tsc/lib/router/admin/get-usage-records.d.ts +1 -0
- package/.tsc/lib/router/admin/get-usage-records.js +1 -0
- package/.tsc/lib/router/bankStatement/ask.js +7 -19
- package/.tsc/lib/router/bankStatement/extract.js +9 -21
- package/.tsc/lib/router/bankStatement/parse.js +6 -16
- package/.tsc/lib/router/document/ask.js +7 -23
- package/.tsc/lib/router/document/extract.js +9 -25
- package/.tsc/lib/router/document/parse.js +7 -23
- package/.tsc/lib/router/free/bank-statement-parse.js +4 -11
- package/.tsc/lib/router/identity/ask.js +7 -19
- package/.tsc/lib/router/identity/extract.js +9 -21
- package/.tsc/lib/router/identity/parse.js +6 -16
- package/.tsc/lib/router/invoice/ask.js +7 -19
- package/.tsc/lib/router/invoice/extract.js +9 -21
- package/.tsc/lib/router/invoice/parse.js +6 -16
- package/.tsc/lib/supported-mimes.d.ts +29 -0
- package/.tsc/lib/supported-mimes.js +122 -0
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
|
@@ -17,6 +17,7 @@ export declare const getUsageRecords: import("@orpc/contract").ContractProcedure
|
|
|
17
17
|
operation: z.ZodString;
|
|
18
18
|
pageCount: z.ZodNullable<z.ZodNumber>;
|
|
19
19
|
requestedModel: z.ZodString;
|
|
20
|
+
actualModel: z.ZodNullable<z.ZodString>;
|
|
20
21
|
apiKey: z.ZodNullable<z.ZodString>;
|
|
21
22
|
workspaceId: z.ZodNullable<z.ZodNumber>;
|
|
22
23
|
createdAt: z.ZodNumber;
|
|
@@ -14,6 +14,7 @@ const usageRecordSchema = z.object({
|
|
|
14
14
|
operation: z.string(),
|
|
15
15
|
pageCount: z.number().nullable(),
|
|
16
16
|
requestedModel: z.string(),
|
|
17
|
+
actualModel: z.string().nullable(),
|
|
17
18
|
apiKey: z.string().nullable(),
|
|
18
19
|
workspaceId: z.number().nullable(),
|
|
19
20
|
createdAt: z.number(),
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { outputAskModelDescription, specializedAskModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const specializedModelSchema = z
|
|
6
7
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
@@ -12,17 +13,9 @@ const askInputSchema = z.object({
|
|
|
12
13
|
.describe("URL of the bank statement file to fetch and parse"),
|
|
13
14
|
file: z
|
|
14
15
|
.file()
|
|
15
|
-
.mime([
|
|
16
|
-
"application/pdf",
|
|
17
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
19
|
-
"text/csv",
|
|
20
|
-
"application/csv",
|
|
21
|
-
"image/png",
|
|
22
|
-
"image/jpeg",
|
|
23
|
-
])
|
|
16
|
+
.mime([...supportedFileMimes])
|
|
24
17
|
.optional()
|
|
25
|
-
.describe(
|
|
18
|
+
.describe(`Bank statement file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
26
19
|
base64: z
|
|
27
20
|
.string()
|
|
28
21
|
.optional()
|
|
@@ -31,12 +24,7 @@ const askInputSchema = z.object({
|
|
|
31
24
|
.string()
|
|
32
25
|
.min(4, "question must be at least 4 characters")
|
|
33
26
|
.describe("The question to answer about the bank statement"),
|
|
34
|
-
model: specializedModelSchema.describe(
|
|
35
|
-
"'auto' (default): Automatically selects the best tier. " +
|
|
36
|
-
"'nano': Uses lightweight models. " +
|
|
37
|
-
"'mini': Uses mid-range models. " +
|
|
38
|
-
"'pro': Uses capable models. " +
|
|
39
|
-
"'max': Uses the most powerful models."),
|
|
27
|
+
model: specializedModelSchema.describe(specializedAskModelDescription),
|
|
40
28
|
callback: z
|
|
41
29
|
.object({
|
|
42
30
|
url: z
|
|
@@ -56,11 +44,11 @@ const askInputSchema = z.object({
|
|
|
56
44
|
const askOutputSchema = z.object({
|
|
57
45
|
markdown: z.string().describe("The answer to the question"),
|
|
58
46
|
pageCount: z.number().int().describe("Total number of pages in the document"),
|
|
59
|
-
model: pdfvectorModelSchema.describe(
|
|
47
|
+
model: pdfvectorModelSchema.describe(outputAskModelDescription),
|
|
60
48
|
credits: z
|
|
61
49
|
.number()
|
|
62
50
|
.int()
|
|
63
|
-
.describe("Number of credits consumed by this API call"),
|
|
51
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
|
|
64
52
|
requestId: z
|
|
65
53
|
.number()
|
|
66
54
|
.int()
|
|
@@ -82,7 +70,7 @@ const requestExamples = {
|
|
|
82
70
|
export const ask = oc
|
|
83
71
|
.route({
|
|
84
72
|
summary: "Ask a question about a bank statement",
|
|
85
|
-
description:
|
|
73
|
+
description: `Parse a bank statement and answer a question about its content using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
86
74
|
tags: ["Bank Statement"],
|
|
87
75
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
88
76
|
})
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { outputExtractModelDescription, specializedExtractModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const specializedModelSchema = z
|
|
6
7
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
@@ -12,17 +13,9 @@ const extractInputSchema = z.object({
|
|
|
12
13
|
.describe("URL of the bank statement file to fetch and parse"),
|
|
13
14
|
file: z
|
|
14
15
|
.file()
|
|
15
|
-
.mime([
|
|
16
|
-
"application/pdf",
|
|
17
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
19
|
-
"text/csv",
|
|
20
|
-
"application/csv",
|
|
21
|
-
"image/png",
|
|
22
|
-
"image/jpeg",
|
|
23
|
-
])
|
|
16
|
+
.mime([...supportedFileMimes])
|
|
24
17
|
.optional()
|
|
25
|
-
.describe(
|
|
18
|
+
.describe(`Bank statement file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
26
19
|
base64: z
|
|
27
20
|
.string()
|
|
28
21
|
.optional()
|
|
@@ -40,7 +33,7 @@ const extractInputSchema = z.object({
|
|
|
40
33
|
const parsed = JSON.parse(trimmed);
|
|
41
34
|
if (typeof parsed !== "object" || parsed === null) {
|
|
42
35
|
ctx.addIssue({
|
|
43
|
-
code:
|
|
36
|
+
code: "custom",
|
|
44
37
|
message: "Schema must be a JSON object",
|
|
45
38
|
});
|
|
46
39
|
return z.NEVER;
|
|
@@ -49,7 +42,7 @@ const extractInputSchema = z.object({
|
|
|
49
42
|
}
|
|
50
43
|
catch {
|
|
51
44
|
ctx.addIssue({
|
|
52
|
-
code:
|
|
45
|
+
code: "custom",
|
|
53
46
|
message: "Invalid JSON string for schema",
|
|
54
47
|
});
|
|
55
48
|
return z.NEVER;
|
|
@@ -57,12 +50,7 @@ const extractInputSchema = z.object({
|
|
|
57
50
|
}),
|
|
58
51
|
])
|
|
59
52
|
.describe("JSON Schema describing the structure of the data to extract from the bank statement. Can be a JSON object or a JSON string."),
|
|
60
|
-
model: specializedModelSchema.describe(
|
|
61
|
-
"'auto' (default): Automatically selects the best tier. " +
|
|
62
|
-
"'nano': Uses lightweight models. " +
|
|
63
|
-
"'mini': Uses mid-range models. " +
|
|
64
|
-
"'pro': Uses capable models. " +
|
|
65
|
-
"'max': Uses the most powerful models."),
|
|
53
|
+
model: specializedModelSchema.describe(specializedExtractModelDescription),
|
|
66
54
|
callback: z
|
|
67
55
|
.object({
|
|
68
56
|
url: z
|
|
@@ -86,11 +74,11 @@ const extractOutputSchema = z.object({
|
|
|
86
74
|
(typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
|
|
87
75
|
.describe("Extracted structured data matching the provided JSON Schema"),
|
|
88
76
|
pageCount: z.number().int().describe("Total number of pages in the document"),
|
|
89
|
-
model: pdfvectorModelSchema.describe(
|
|
77
|
+
model: pdfvectorModelSchema.describe(outputExtractModelDescription),
|
|
90
78
|
credits: z
|
|
91
79
|
.number()
|
|
92
80
|
.int()
|
|
93
|
-
.describe("Number of credits consumed by this API call"),
|
|
81
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
|
|
94
82
|
requestId: z
|
|
95
83
|
.number()
|
|
96
84
|
.int()
|
|
@@ -131,7 +119,7 @@ const requestExamples = {
|
|
|
131
119
|
export const extract = oc
|
|
132
120
|
.route({
|
|
133
121
|
summary: "Extract structured data from a bank statement",
|
|
134
|
-
description:
|
|
122
|
+
description: `Parse a bank statement and extract structured data matching a provided JSON Schema using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
135
123
|
tags: ["Bank Statement"],
|
|
136
124
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
137
125
|
})
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
import { specializedParseModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
3
4
|
import { getDefaultSpec } from "./get-default-spec";
|
|
4
5
|
const specializedParseModelSchema = z
|
|
5
6
|
.enum(["pro", "max", "auto"], {
|
|
@@ -13,25 +14,14 @@ const parseInputSchema = z.object({
|
|
|
13
14
|
.describe("URL of the bank statement file to fetch and parse"),
|
|
14
15
|
file: z
|
|
15
16
|
.file()
|
|
16
|
-
.mime([
|
|
17
|
-
"application/pdf",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
19
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
20
|
-
"text/csv",
|
|
21
|
-
"application/csv",
|
|
22
|
-
"image/png",
|
|
23
|
-
"image/jpeg",
|
|
24
|
-
])
|
|
17
|
+
.mime([...supportedFileMimes])
|
|
25
18
|
.optional()
|
|
26
|
-
.describe(
|
|
19
|
+
.describe(`Bank statement file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
27
20
|
base64: z
|
|
28
21
|
.string()
|
|
29
22
|
.optional()
|
|
30
23
|
.describe("Base64-encoded bank statement file content"),
|
|
31
|
-
model: specializedParseModelSchema.describe("
|
|
32
|
-
"'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
|
|
33
|
-
"'pro': Extracts structured bank statement fields with standard accuracy. " +
|
|
34
|
-
"'max': Extracts structured bank statement fields with highest accuracy and fallback."),
|
|
24
|
+
model: specializedParseModelSchema.describe(specializedParseModelDescription("bank statement")),
|
|
35
25
|
callback: z
|
|
36
26
|
.object({
|
|
37
27
|
url: z
|
|
@@ -59,7 +49,7 @@ const parseOutputSchema = z.object({
|
|
|
59
49
|
credits: z
|
|
60
50
|
.number()
|
|
61
51
|
.int()
|
|
62
|
-
.describe("Number of credits consumed by this API call"),
|
|
52
|
+
.describe("Number of credits consumed by this API call. Cost per page: pro=6, max=10."),
|
|
63
53
|
requestId: z
|
|
64
54
|
.number()
|
|
65
55
|
.int()
|
|
@@ -97,7 +87,7 @@ const requestExamples = {
|
|
|
97
87
|
export const parse = oc
|
|
98
88
|
.route({
|
|
99
89
|
summary: "Parse a bank statement",
|
|
100
|
-
description:
|
|
90
|
+
description: `Extract text and structured data from a bank statement. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
101
91
|
tags: ["Bank Statement"],
|
|
102
92
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
103
93
|
})
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { documentAskModelDescription, outputAskModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const askInputSchema = z.object({
|
|
6
7
|
url: z
|
|
@@ -9,17 +10,9 @@ const askInputSchema = z.object({
|
|
|
9
10
|
.describe("URL of the document file to fetch and parse"),
|
|
10
11
|
file: z
|
|
11
12
|
.file()
|
|
12
|
-
.mime([
|
|
13
|
-
"application/pdf",
|
|
14
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
15
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
16
|
-
"text/csv",
|
|
17
|
-
"application/csv",
|
|
18
|
-
"image/png",
|
|
19
|
-
"image/jpeg",
|
|
20
|
-
])
|
|
13
|
+
.mime([...supportedFileMimes])
|
|
21
14
|
.optional()
|
|
22
|
-
.describe(
|
|
15
|
+
.describe(`Document file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
23
16
|
base64: z
|
|
24
17
|
.string()
|
|
25
18
|
.optional()
|
|
@@ -32,12 +25,7 @@ const askInputSchema = z.object({
|
|
|
32
25
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
33
26
|
.optional()
|
|
34
27
|
.default("auto")
|
|
35
|
-
.describe(
|
|
36
|
-
"'auto' (default): Automatically selects the best tier based on document page count and document complexity. " +
|
|
37
|
-
"'nano': Uses lightweight models (likely better than GPT-5-nano). Supports PDF, Word, Excel, CSV. " +
|
|
38
|
-
"'mini': Uses mid-range models (likely better than GPT-5-mini). Supports PDF, Word, Excel, CSV. " +
|
|
39
|
-
"'pro': Uses capable models (likely better than GPT-5.2). Supports PDF, Word, Excel, CSV, Image. " +
|
|
40
|
-
"'max': Uses the most powerful models (likely better than Claude Opus 4.6). Supports PDF, Word, Excel, CSV, Image."),
|
|
28
|
+
.describe(documentAskModelDescription),
|
|
41
29
|
callback: z
|
|
42
30
|
.object({
|
|
43
31
|
url: z
|
|
@@ -61,15 +49,11 @@ const askOutputSchema = z
|
|
|
61
49
|
.number()
|
|
62
50
|
.int()
|
|
63
51
|
.describe("Total number of pages in the document"),
|
|
64
|
-
model: pdfvectorModelSchema.describe(
|
|
65
|
-
"'nano': Supports PDF, Word, Excel, CSV. " +
|
|
66
|
-
"'mini': Supports PDF, Word, Excel, CSV. " +
|
|
67
|
-
"'pro': Supports PDF, Word, Excel, CSV, Image. " +
|
|
68
|
-
"'max': Supports PDF, Word, Excel, CSV, Image."),
|
|
52
|
+
model: pdfvectorModelSchema.describe(outputAskModelDescription),
|
|
69
53
|
credits: z
|
|
70
54
|
.number()
|
|
71
55
|
.int()
|
|
72
|
-
.describe("Number of credits consumed by this API call"),
|
|
56
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=2, mini=4, pro=8, max=16."),
|
|
73
57
|
requestId: z
|
|
74
58
|
.number()
|
|
75
59
|
.int()
|
|
@@ -132,7 +116,7 @@ const requestExamples = {
|
|
|
132
116
|
export const ask = oc
|
|
133
117
|
.route({
|
|
134
118
|
summary: "Ask a question about a document",
|
|
135
|
-
description:
|
|
119
|
+
description: `Parse a document and answer a question about its content using AI. Supports ${supportedFileTypesLong}. Files up to 1000 pages and up to 500MB in size. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
136
120
|
tags: ["Document"],
|
|
137
121
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
138
122
|
})
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { documentExtractModelDescription, outputExtractModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const extractInputSchema = z.object({
|
|
6
7
|
url: z
|
|
@@ -9,17 +10,9 @@ const extractInputSchema = z.object({
|
|
|
9
10
|
.describe("URL of the document file to fetch and parse"),
|
|
10
11
|
file: z
|
|
11
12
|
.file()
|
|
12
|
-
.mime([
|
|
13
|
-
"application/pdf",
|
|
14
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
15
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
16
|
-
"text/csv",
|
|
17
|
-
"application/csv",
|
|
18
|
-
"image/png",
|
|
19
|
-
"image/jpeg",
|
|
20
|
-
])
|
|
13
|
+
.mime([...supportedFileMimes])
|
|
21
14
|
.optional()
|
|
22
|
-
.describe(
|
|
15
|
+
.describe(`Document file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
23
16
|
base64: z
|
|
24
17
|
.string()
|
|
25
18
|
.optional()
|
|
@@ -38,7 +31,7 @@ const extractInputSchema = z.object({
|
|
|
38
31
|
const parsed = JSON.parse(trimmed);
|
|
39
32
|
if (typeof parsed !== "object" || parsed === null) {
|
|
40
33
|
ctx.addIssue({
|
|
41
|
-
code:
|
|
34
|
+
code: "custom",
|
|
42
35
|
message: "Schema must be a JSON object",
|
|
43
36
|
});
|
|
44
37
|
return z.NEVER;
|
|
@@ -47,7 +40,7 @@ const extractInputSchema = z.object({
|
|
|
47
40
|
}
|
|
48
41
|
catch {
|
|
49
42
|
ctx.addIssue({
|
|
50
|
-
code:
|
|
43
|
+
code: "custom",
|
|
51
44
|
message: "Invalid JSON string for schema",
|
|
52
45
|
});
|
|
53
46
|
return z.NEVER;
|
|
@@ -59,12 +52,7 @@ const extractInputSchema = z.object({
|
|
|
59
52
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
60
53
|
.optional()
|
|
61
54
|
.default("auto")
|
|
62
|
-
.describe(
|
|
63
|
-
"'auto' (default): Automatically selects the best tier based on document page count and document complexity. " +
|
|
64
|
-
"'nano': Uses lightweight models (likely better than GPT-5-nano). Supports PDF, Word, Excel, CSV. " +
|
|
65
|
-
"'mini': Uses mid-range models (likely better than GPT-5-mini). Supports PDF, Word, Excel, CSV. " +
|
|
66
|
-
"'pro': Uses capable models (likely better than GPT-5.2). Supports PDF, Word, Excel, CSV, Image. " +
|
|
67
|
-
"'max': Uses the most powerful models (likely better than Claude Opus 4.6). Supports PDF, Word, Excel, CSV, Image."),
|
|
55
|
+
.describe(documentExtractModelDescription),
|
|
68
56
|
callback: z
|
|
69
57
|
.object({
|
|
70
58
|
url: z
|
|
@@ -92,15 +80,11 @@ const extractOutputSchema = z
|
|
|
92
80
|
.number()
|
|
93
81
|
.int()
|
|
94
82
|
.describe("Total number of pages in the document"),
|
|
95
|
-
model: pdfvectorModelSchema.describe(
|
|
96
|
-
"'nano': Supports PDF, Word, Excel, CSV. " +
|
|
97
|
-
"'mini': Supports PDF, Word, Excel, CSV. " +
|
|
98
|
-
"'pro': Supports PDF, Word, Excel, CSV, Image. " +
|
|
99
|
-
"'max': Supports PDF, Word, Excel, CSV, Image."),
|
|
83
|
+
model: pdfvectorModelSchema.describe(outputExtractModelDescription),
|
|
100
84
|
credits: z
|
|
101
85
|
.number()
|
|
102
86
|
.int()
|
|
103
|
-
.describe("Number of credits consumed by this API call"),
|
|
87
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=2, mini=4, pro=8, max=16."),
|
|
104
88
|
requestId: z
|
|
105
89
|
.number()
|
|
106
90
|
.int()
|
|
@@ -175,7 +159,7 @@ const requestExamples = {
|
|
|
175
159
|
export const extract = oc
|
|
176
160
|
.route({
|
|
177
161
|
summary: "Extract structured data from a document",
|
|
178
|
-
description:
|
|
162
|
+
description: `Parse a document and extract structured data matching a provided JSON Schema using AI. Supports ${supportedFileTypesLong}. Files up to 1000 pages and up to 500MB in size. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
179
163
|
tags: ["Document"],
|
|
180
164
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
181
165
|
})
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { documentParseModelDescription, outputModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const parseInputSchema = z.object({
|
|
6
7
|
url: z
|
|
@@ -9,17 +10,9 @@ const parseInputSchema = z.object({
|
|
|
9
10
|
.describe("URL of the document file to fetch and parse"),
|
|
10
11
|
file: z
|
|
11
12
|
.file()
|
|
12
|
-
.mime([
|
|
13
|
-
"application/pdf",
|
|
14
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
15
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
16
|
-
"text/csv",
|
|
17
|
-
"application/csv",
|
|
18
|
-
"image/png",
|
|
19
|
-
"image/jpeg",
|
|
20
|
-
])
|
|
13
|
+
.mime([...supportedFileMimes])
|
|
21
14
|
.optional()
|
|
22
|
-
.describe(
|
|
15
|
+
.describe(`Document file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
23
16
|
base64: z
|
|
24
17
|
.string()
|
|
25
18
|
.optional()
|
|
@@ -28,12 +21,7 @@ const parseInputSchema = z.object({
|
|
|
28
21
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
29
22
|
.optional()
|
|
30
23
|
.default("auto")
|
|
31
|
-
.describe(
|
|
32
|
-
"'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image. " +
|
|
33
|
-
"'nano': For simple documents with plain text content. File up to 30 pages, up to 10MB in size. Supports PDF, Word, Excel, CSV. " +
|
|
34
|
-
"'mini': For documents with tables and structured content. File up to 30 pages, up to 10MB in size. Supports PDF, Word, Excel, CSV. " +
|
|
35
|
-
"'pro': For documents up to 30 pages with tables, handwritten text, figures, math, and Arabic. File up to 30 pages, up to 40MB in size. Supports PDF, Word, Excel, CSV, Image. " +
|
|
36
|
-
"'max': For large documents with full Pro capabilities plus enhanced multilingual support. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image."),
|
|
24
|
+
.describe(documentParseModelDescription),
|
|
37
25
|
callback: z
|
|
38
26
|
.object({
|
|
39
27
|
url: z
|
|
@@ -53,15 +41,11 @@ const parseInputSchema = z.object({
|
|
|
53
41
|
const parseOutputSchema = z.object({
|
|
54
42
|
markdown: z.string().describe("Extracted text content from the document"),
|
|
55
43
|
pageCount: z.number().int().describe("Total number of pages in the document"),
|
|
56
|
-
model: pdfvectorModelSchema.describe(
|
|
57
|
-
"'nano': Supports PDF, Word, Excel, CSV. " +
|
|
58
|
-
"'mini': Supports PDF, Word, Excel, CSV. " +
|
|
59
|
-
"'pro': Supports PDF, Word, Excel, CSV, Image. " +
|
|
60
|
-
"'max': Supports PDF, Word, Excel, CSV, Image."),
|
|
44
|
+
model: pdfvectorModelSchema.describe(outputModelDescription),
|
|
61
45
|
credits: z
|
|
62
46
|
.number()
|
|
63
47
|
.int()
|
|
64
|
-
.describe("Number of credits consumed by this API call"),
|
|
48
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=1, mini=2, pro=4, max=8."),
|
|
65
49
|
requestId: z
|
|
66
50
|
.number()
|
|
67
51
|
.int()
|
|
@@ -120,7 +104,7 @@ const requestExamples = {
|
|
|
120
104
|
export const parse = oc
|
|
121
105
|
.route({
|
|
122
106
|
summary: "Parse a document",
|
|
123
|
-
description:
|
|
107
|
+
description: `Extract text and page count from a document. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
124
108
|
tags: ["Document"],
|
|
125
109
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
126
110
|
})
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
import { supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
3
4
|
const inputSchema = z.object({
|
|
4
5
|
url: z
|
|
5
6
|
.url()
|
|
@@ -7,17 +8,9 @@ const inputSchema = z.object({
|
|
|
7
8
|
.describe("URL of the bank statement file to fetch and parse"),
|
|
8
9
|
file: z
|
|
9
10
|
.file()
|
|
10
|
-
.mime([
|
|
11
|
-
"application/pdf",
|
|
12
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
13
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
14
|
-
"text/csv",
|
|
15
|
-
"application/csv",
|
|
16
|
-
"image/png",
|
|
17
|
-
"image/jpeg",
|
|
18
|
-
])
|
|
11
|
+
.mime([...supportedFileMimes])
|
|
19
12
|
.optional()
|
|
20
|
-
.describe(
|
|
13
|
+
.describe(`Bank statement file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
21
14
|
base64: z
|
|
22
15
|
.string()
|
|
23
16
|
.optional()
|
|
@@ -70,7 +63,7 @@ const responseExample = {
|
|
|
70
63
|
export const bankStatementParse = oc
|
|
71
64
|
.route({
|
|
72
65
|
summary: "Free: Parse a bank statement",
|
|
73
|
-
description:
|
|
66
|
+
description: `Extract text and structured data from a bank statement. Supports ${supportedFileTypesLong}. Free tier: max 5 pages, max 5 MB. Rate limited to 5 requests per IP per day.`,
|
|
74
67
|
tags: ["Free"],
|
|
75
68
|
spec: (op) => {
|
|
76
69
|
op.security = [];
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { outputAskModelDescription, specializedAskModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const specializedModelSchema = z
|
|
6
7
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
@@ -12,17 +13,9 @@ const askInputSchema = z.object({
|
|
|
12
13
|
.describe("URL of the identity document file to fetch and parse"),
|
|
13
14
|
file: z
|
|
14
15
|
.file()
|
|
15
|
-
.mime([
|
|
16
|
-
"application/pdf",
|
|
17
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
19
|
-
"text/csv",
|
|
20
|
-
"application/csv",
|
|
21
|
-
"image/png",
|
|
22
|
-
"image/jpeg",
|
|
23
|
-
])
|
|
16
|
+
.mime([...supportedFileMimes])
|
|
24
17
|
.optional()
|
|
25
|
-
.describe(
|
|
18
|
+
.describe(`Identity document file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
26
19
|
base64: z
|
|
27
20
|
.string()
|
|
28
21
|
.optional()
|
|
@@ -31,12 +24,7 @@ const askInputSchema = z.object({
|
|
|
31
24
|
.string()
|
|
32
25
|
.min(4, "question must be at least 4 characters")
|
|
33
26
|
.describe("The question to answer about the identity document"),
|
|
34
|
-
model: specializedModelSchema.describe(
|
|
35
|
-
"'auto' (default): Automatically selects the best tier. " +
|
|
36
|
-
"'nano': Uses lightweight models. " +
|
|
37
|
-
"'mini': Uses mid-range models. " +
|
|
38
|
-
"'pro': Uses capable models. " +
|
|
39
|
-
"'max': Uses the most powerful models."),
|
|
27
|
+
model: specializedModelSchema.describe(specializedAskModelDescription),
|
|
40
28
|
callback: z
|
|
41
29
|
.object({
|
|
42
30
|
url: z
|
|
@@ -56,11 +44,11 @@ const askInputSchema = z.object({
|
|
|
56
44
|
const askOutputSchema = z.object({
|
|
57
45
|
markdown: z.string().describe("The answer to the question"),
|
|
58
46
|
pageCount: z.number().int().describe("Total number of pages in the document"),
|
|
59
|
-
model: pdfvectorModelSchema.describe(
|
|
47
|
+
model: pdfvectorModelSchema.describe(outputAskModelDescription),
|
|
60
48
|
credits: z
|
|
61
49
|
.number()
|
|
62
50
|
.int()
|
|
63
|
-
.describe("Number of credits consumed by this API call"),
|
|
51
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
|
|
64
52
|
requestId: z
|
|
65
53
|
.number()
|
|
66
54
|
.int()
|
|
@@ -89,7 +77,7 @@ const requestExamples = {
|
|
|
89
77
|
export const ask = oc
|
|
90
78
|
.route({
|
|
91
79
|
summary: "Ask a question about an identity document",
|
|
92
|
-
description:
|
|
80
|
+
description: `Parse an identity document and answer a question about its content using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
93
81
|
tags: ["Identity"],
|
|
94
82
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
95
83
|
})
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { outputExtractModelDescription, specializedExtractModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const specializedModelSchema = z
|
|
6
7
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
@@ -12,17 +13,9 @@ const extractInputSchema = z.object({
|
|
|
12
13
|
.describe("URL of the identity document file to fetch and parse"),
|
|
13
14
|
file: z
|
|
14
15
|
.file()
|
|
15
|
-
.mime([
|
|
16
|
-
"application/pdf",
|
|
17
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
19
|
-
"text/csv",
|
|
20
|
-
"application/csv",
|
|
21
|
-
"image/png",
|
|
22
|
-
"image/jpeg",
|
|
23
|
-
])
|
|
16
|
+
.mime([...supportedFileMimes])
|
|
24
17
|
.optional()
|
|
25
|
-
.describe(
|
|
18
|
+
.describe(`Identity document file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
26
19
|
base64: z
|
|
27
20
|
.string()
|
|
28
21
|
.optional()
|
|
@@ -40,7 +33,7 @@ const extractInputSchema = z.object({
|
|
|
40
33
|
const parsed = JSON.parse(trimmed);
|
|
41
34
|
if (typeof parsed !== "object" || parsed === null) {
|
|
42
35
|
ctx.addIssue({
|
|
43
|
-
code:
|
|
36
|
+
code: "custom",
|
|
44
37
|
message: "Schema must be a JSON object",
|
|
45
38
|
});
|
|
46
39
|
return z.NEVER;
|
|
@@ -49,7 +42,7 @@ const extractInputSchema = z.object({
|
|
|
49
42
|
}
|
|
50
43
|
catch {
|
|
51
44
|
ctx.addIssue({
|
|
52
|
-
code:
|
|
45
|
+
code: "custom",
|
|
53
46
|
message: "Invalid JSON string for schema",
|
|
54
47
|
});
|
|
55
48
|
return z.NEVER;
|
|
@@ -57,12 +50,7 @@ const extractInputSchema = z.object({
|
|
|
57
50
|
}),
|
|
58
51
|
])
|
|
59
52
|
.describe("JSON Schema describing the structure of the data to extract from the identity document. Can be a JSON object or a JSON string."),
|
|
60
|
-
model: specializedModelSchema.describe(
|
|
61
|
-
"'auto' (default): Automatically selects the best tier. " +
|
|
62
|
-
"'nano': Uses lightweight models. " +
|
|
63
|
-
"'mini': Uses mid-range models. " +
|
|
64
|
-
"'pro': Uses capable models. " +
|
|
65
|
-
"'max': Uses the most powerful models."),
|
|
53
|
+
model: specializedModelSchema.describe(specializedExtractModelDescription),
|
|
66
54
|
callback: z
|
|
67
55
|
.object({
|
|
68
56
|
url: z
|
|
@@ -86,11 +74,11 @@ const extractOutputSchema = z.object({
|
|
|
86
74
|
(typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
|
|
87
75
|
.describe("Extracted structured data matching the provided JSON Schema"),
|
|
88
76
|
pageCount: z.number().int().describe("Total number of pages in the document"),
|
|
89
|
-
model: pdfvectorModelSchema.describe(
|
|
77
|
+
model: pdfvectorModelSchema.describe(outputExtractModelDescription),
|
|
90
78
|
credits: z
|
|
91
79
|
.number()
|
|
92
80
|
.int()
|
|
93
|
-
.describe("Number of credits consumed by this API call"),
|
|
81
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
|
|
94
82
|
requestId: z
|
|
95
83
|
.number()
|
|
96
84
|
.int()
|
|
@@ -121,7 +109,7 @@ const requestExamples = {
|
|
|
121
109
|
export const extract = oc
|
|
122
110
|
.route({
|
|
123
111
|
summary: "Extract structured data from an identity document",
|
|
124
|
-
description:
|
|
112
|
+
description: `Parse an identity document and extract structured data matching a provided JSON Schema using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
125
113
|
tags: ["Identity"],
|
|
126
114
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
127
115
|
})
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
import { specializedParseModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
3
4
|
import { getDefaultSpec } from "./get-default-spec";
|
|
4
5
|
const specializedParseModelSchema = z
|
|
5
6
|
.enum(["pro", "max", "auto"], {
|
|
@@ -13,25 +14,14 @@ const parseInputSchema = z.object({
|
|
|
13
14
|
.describe("URL of the identity document file to fetch and parse"),
|
|
14
15
|
file: z
|
|
15
16
|
.file()
|
|
16
|
-
.mime([
|
|
17
|
-
"application/pdf",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
19
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
20
|
-
"text/csv",
|
|
21
|
-
"application/csv",
|
|
22
|
-
"image/png",
|
|
23
|
-
"image/jpeg",
|
|
24
|
-
])
|
|
17
|
+
.mime([...supportedFileMimes])
|
|
25
18
|
.optional()
|
|
26
|
-
.describe(
|
|
19
|
+
.describe(`Identity document file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
27
20
|
base64: z
|
|
28
21
|
.string()
|
|
29
22
|
.optional()
|
|
30
23
|
.describe("Base64-encoded identity document file content"),
|
|
31
|
-
model: specializedParseModelSchema.describe("
|
|
32
|
-
"'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
|
|
33
|
-
"'pro': Extracts structured identity document fields with standard accuracy. " +
|
|
34
|
-
"'max': Extracts structured identity document fields with highest accuracy."),
|
|
24
|
+
model: specializedParseModelSchema.describe(specializedParseModelDescription("identity document")),
|
|
35
25
|
callback: z
|
|
36
26
|
.object({
|
|
37
27
|
url: z
|
|
@@ -68,7 +58,7 @@ const parseOutputSchema = z.object({
|
|
|
68
58
|
credits: z
|
|
69
59
|
.number()
|
|
70
60
|
.int()
|
|
71
|
-
.describe("Number of credits consumed by this API call"),
|
|
61
|
+
.describe("Number of credits consumed by this API call. Cost per page: pro=6, max=10."),
|
|
72
62
|
requestId: z
|
|
73
63
|
.number()
|
|
74
64
|
.int()
|
|
@@ -101,7 +91,7 @@ const requestExamples = {
|
|
|
101
91
|
export const parse = oc
|
|
102
92
|
.route({
|
|
103
93
|
summary: "Parse an identity document",
|
|
104
|
-
description:
|
|
94
|
+
description: `Extract text and structured data from an identity document. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
105
95
|
tags: ["Identity"],
|
|
106
96
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
107
97
|
})
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { outputAskModelDescription, specializedAskModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const specializedModelSchema = z
|
|
6
7
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
@@ -12,28 +13,15 @@ const askInputSchema = z.object({
|
|
|
12
13
|
.describe("URL of the invoice file to fetch and parse"),
|
|
13
14
|
file: z
|
|
14
15
|
.file()
|
|
15
|
-
.mime([
|
|
16
|
-
"application/pdf",
|
|
17
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
19
|
-
"text/csv",
|
|
20
|
-
"application/csv",
|
|
21
|
-
"image/png",
|
|
22
|
-
"image/jpeg",
|
|
23
|
-
])
|
|
16
|
+
.mime([...supportedFileMimes])
|
|
24
17
|
.optional()
|
|
25
|
-
.describe(
|
|
18
|
+
.describe(`Invoice file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
26
19
|
base64: z.string().optional().describe("Base64-encoded invoice file content"),
|
|
27
20
|
question: z
|
|
28
21
|
.string()
|
|
29
22
|
.min(4, "question must be at least 4 characters")
|
|
30
23
|
.describe("The question to answer about the invoice"),
|
|
31
|
-
model: specializedModelSchema.describe(
|
|
32
|
-
"'auto' (default): Automatically selects the best tier. " +
|
|
33
|
-
"'nano': Uses lightweight models. " +
|
|
34
|
-
"'mini': Uses mid-range models. " +
|
|
35
|
-
"'pro': Uses capable models. " +
|
|
36
|
-
"'max': Uses the most powerful models."),
|
|
24
|
+
model: specializedModelSchema.describe(specializedAskModelDescription),
|
|
37
25
|
callback: z
|
|
38
26
|
.object({
|
|
39
27
|
url: z
|
|
@@ -53,11 +41,11 @@ const askInputSchema = z.object({
|
|
|
53
41
|
const askOutputSchema = z.object({
|
|
54
42
|
markdown: z.string().describe("The answer to the question"),
|
|
55
43
|
pageCount: z.number().int().describe("Total number of pages in the document"),
|
|
56
|
-
model: pdfvectorModelSchema.describe(
|
|
44
|
+
model: pdfvectorModelSchema.describe(outputAskModelDescription),
|
|
57
45
|
credits: z
|
|
58
46
|
.number()
|
|
59
47
|
.int()
|
|
60
|
-
.describe("Number of credits consumed by this API call"),
|
|
48
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
|
|
61
49
|
requestId: z
|
|
62
50
|
.number()
|
|
63
51
|
.int()
|
|
@@ -79,7 +67,7 @@ const requestExamples = {
|
|
|
79
67
|
export const ask = oc
|
|
80
68
|
.route({
|
|
81
69
|
summary: "Ask a question about an invoice",
|
|
82
|
-
description:
|
|
70
|
+
description: `Parse an invoice and answer a question about its content using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
83
71
|
tags: ["Invoice"],
|
|
84
72
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
85
73
|
})
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
|
+
import { outputExtractModelDescription, specializedExtractModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
4
5
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
6
|
const specializedModelSchema = z
|
|
6
7
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
@@ -12,17 +13,9 @@ const extractInputSchema = z.object({
|
|
|
12
13
|
.describe("URL of the invoice file to fetch and parse"),
|
|
13
14
|
file: z
|
|
14
15
|
.file()
|
|
15
|
-
.mime([
|
|
16
|
-
"application/pdf",
|
|
17
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
19
|
-
"text/csv",
|
|
20
|
-
"application/csv",
|
|
21
|
-
"image/png",
|
|
22
|
-
"image/jpeg",
|
|
23
|
-
])
|
|
16
|
+
.mime([...supportedFileMimes])
|
|
24
17
|
.optional()
|
|
25
|
-
.describe(
|
|
18
|
+
.describe(`Invoice file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
26
19
|
base64: z.string().optional().describe("Base64-encoded invoice file content"),
|
|
27
20
|
prompt: z
|
|
28
21
|
.string()
|
|
@@ -37,7 +30,7 @@ const extractInputSchema = z.object({
|
|
|
37
30
|
const parsed = JSON.parse(trimmed);
|
|
38
31
|
if (typeof parsed !== "object" || parsed === null) {
|
|
39
32
|
ctx.addIssue({
|
|
40
|
-
code:
|
|
33
|
+
code: "custom",
|
|
41
34
|
message: "Schema must be a JSON object",
|
|
42
35
|
});
|
|
43
36
|
return z.NEVER;
|
|
@@ -46,7 +39,7 @@ const extractInputSchema = z.object({
|
|
|
46
39
|
}
|
|
47
40
|
catch {
|
|
48
41
|
ctx.addIssue({
|
|
49
|
-
code:
|
|
42
|
+
code: "custom",
|
|
50
43
|
message: "Invalid JSON string for schema",
|
|
51
44
|
});
|
|
52
45
|
return z.NEVER;
|
|
@@ -54,12 +47,7 @@ const extractInputSchema = z.object({
|
|
|
54
47
|
}),
|
|
55
48
|
])
|
|
56
49
|
.describe("JSON Schema describing the structure of the data to extract from the invoice. Can be a JSON object or a JSON string."),
|
|
57
|
-
model: specializedModelSchema.describe(
|
|
58
|
-
"'auto' (default): Automatically selects the best tier. " +
|
|
59
|
-
"'nano': Uses lightweight models. " +
|
|
60
|
-
"'mini': Uses mid-range models. " +
|
|
61
|
-
"'pro': Uses capable models. " +
|
|
62
|
-
"'max': Uses the most powerful models."),
|
|
50
|
+
model: specializedModelSchema.describe(specializedExtractModelDescription),
|
|
63
51
|
callback: z
|
|
64
52
|
.object({
|
|
65
53
|
url: z
|
|
@@ -83,11 +71,11 @@ const extractOutputSchema = z.object({
|
|
|
83
71
|
(typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
|
|
84
72
|
.describe("Extracted structured data matching the provided JSON Schema"),
|
|
85
73
|
pageCount: z.number().int().describe("Total number of pages in the document"),
|
|
86
|
-
model: pdfvectorModelSchema.describe(
|
|
74
|
+
model: pdfvectorModelSchema.describe(outputExtractModelDescription),
|
|
87
75
|
credits: z
|
|
88
76
|
.number()
|
|
89
77
|
.int()
|
|
90
|
-
.describe("Number of credits consumed by this API call"),
|
|
78
|
+
.describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
|
|
91
79
|
requestId: z
|
|
92
80
|
.number()
|
|
93
81
|
.int()
|
|
@@ -127,7 +115,7 @@ const requestExamples = {
|
|
|
127
115
|
export const extract = oc
|
|
128
116
|
.route({
|
|
129
117
|
summary: "Extract structured data from an invoice",
|
|
130
|
-
description:
|
|
118
|
+
description: `Parse an invoice and extract structured data matching a provided JSON Schema using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
131
119
|
tags: ["Invoice"],
|
|
132
120
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
133
121
|
})
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { oc } from "@orpc/contract";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
import { specializedParseModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
|
|
3
4
|
import { getDefaultSpec } from "./get-default-spec";
|
|
4
5
|
const specializedParseModelSchema = z
|
|
5
6
|
.enum(["pro", "max", "auto"], {
|
|
@@ -13,22 +14,11 @@ const parseInputSchema = z.object({
|
|
|
13
14
|
.describe("URL of the invoice file to fetch and parse"),
|
|
14
15
|
file: z
|
|
15
16
|
.file()
|
|
16
|
-
.mime([
|
|
17
|
-
"application/pdf",
|
|
18
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
19
|
-
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
20
|
-
"text/csv",
|
|
21
|
-
"application/csv",
|
|
22
|
-
"image/png",
|
|
23
|
-
"image/jpeg",
|
|
24
|
-
])
|
|
17
|
+
.mime([...supportedFileMimes])
|
|
25
18
|
.optional()
|
|
26
|
-
.describe(
|
|
19
|
+
.describe(`Invoice file upload via multipart form-data (${supportedFileFormatsDescription})`),
|
|
27
20
|
base64: z.string().optional().describe("Base64-encoded invoice file content"),
|
|
28
|
-
model: specializedParseModelSchema.describe("
|
|
29
|
-
"'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
|
|
30
|
-
"'pro': Extracts structured invoice fields with standard accuracy. " +
|
|
31
|
-
"'max': Extracts structured invoice fields with highest accuracy and fallback."),
|
|
21
|
+
model: specializedParseModelSchema.describe(specializedParseModelDescription("invoice")),
|
|
32
22
|
callback: z
|
|
33
23
|
.object({
|
|
34
24
|
url: z
|
|
@@ -54,7 +44,7 @@ const parseOutputSchema = z.object({
|
|
|
54
44
|
credits: z
|
|
55
45
|
.number()
|
|
56
46
|
.int()
|
|
57
|
-
.describe("Number of credits consumed by this API call"),
|
|
47
|
+
.describe("Number of credits consumed by this API call. Cost per page: pro=6, max=10."),
|
|
58
48
|
requestId: z
|
|
59
49
|
.number()
|
|
60
50
|
.int()
|
|
@@ -92,7 +82,7 @@ const requestExamples = {
|
|
|
92
82
|
export const parse = oc
|
|
93
83
|
.route({
|
|
94
84
|
summary: "Parse an invoice",
|
|
95
|
-
description:
|
|
85
|
+
description: `Extract text and structured data from an invoice. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
|
|
96
86
|
tags: ["Invoice"],
|
|
97
87
|
spec: (op) => getDefaultSpec(op, requestExamples),
|
|
98
88
|
})
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* All MIME types accepted for file uploads across all API endpoints.
|
|
3
|
+
* Single source of truth — imported by all contract schemas.
|
|
4
|
+
*/
|
|
5
|
+
export declare const supportedFileMimes: readonly ["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "text/csv", "application/csv", "image/png", "image/jpeg", "image/tiff", "image/bmp", "image/heif", "image/heic", "text/plain", "text/markdown", "text/tab-separated-values", "text/xml", "application/xml", "application/rtf", "text/rtf", "text/html", "application/epub+zip", "application/vnd.oasis.opendocument.text", "application/vnd.oasis.opendocument.spreadsheet", "application/vnd.oasis.opendocument.presentation", "application/x-bibtex"];
|
|
6
|
+
export declare const supportedFileFormatsDescription = "PDF, DOCX, XLSX, PPTX, CSV, PNG, JPG, TIFF, BMP, HEIF, TXT, MD, TSV, XML, RTF, HTML, ODT, ODS, ODP, EPUB, BIB, RIS, NBIB, ENW";
|
|
7
|
+
/**
|
|
8
|
+
* Human-readable description of supported file types with extensions.
|
|
9
|
+
* Used in route-level API descriptions.
|
|
10
|
+
*/
|
|
11
|
+
export declare const supportedFileTypesLong: string;
|
|
12
|
+
/** Model tier descriptions for document parse endpoints. */
|
|
13
|
+
export declare const documentParseModelDescription: string;
|
|
14
|
+
/** Model tier descriptions for document extract endpoints. */
|
|
15
|
+
export declare const documentExtractModelDescription: string;
|
|
16
|
+
/** Model tier descriptions for document ask endpoints. */
|
|
17
|
+
export declare const documentAskModelDescription: string;
|
|
18
|
+
/** Model tier descriptions for invoice/identity/bankStatement parse endpoints (pro/max/auto only). */
|
|
19
|
+
export declare const specializedParseModelDescription: (type: string) => string;
|
|
20
|
+
/** Model tier descriptions for invoice/identity/bankStatement extract endpoints. */
|
|
21
|
+
export declare const specializedExtractModelDescription: string;
|
|
22
|
+
/** Model tier descriptions for invoice/identity/bankStatement ask endpoints. */
|
|
23
|
+
export declare const specializedAskModelDescription: string;
|
|
24
|
+
/** Output model description for parse results. */
|
|
25
|
+
export declare const outputModelDescription: string;
|
|
26
|
+
/** Output model description for extract results. */
|
|
27
|
+
export declare const outputExtractModelDescription: string;
|
|
28
|
+
/** Output model description for ask results. */
|
|
29
|
+
export declare const outputAskModelDescription: string;
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* All MIME types accepted for file uploads across all API endpoints.
|
|
3
|
+
* Single source of truth — imported by all contract schemas.
|
|
4
|
+
*/
|
|
5
|
+
export const supportedFileMimes = [
|
|
6
|
+
// PDF
|
|
7
|
+
"application/pdf",
|
|
8
|
+
// Office documents
|
|
9
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
10
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
11
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
12
|
+
// CSV
|
|
13
|
+
"text/csv",
|
|
14
|
+
"application/csv",
|
|
15
|
+
// Images
|
|
16
|
+
"image/png",
|
|
17
|
+
"image/jpeg",
|
|
18
|
+
"image/tiff",
|
|
19
|
+
"image/bmp",
|
|
20
|
+
"image/heif",
|
|
21
|
+
"image/heic",
|
|
22
|
+
// Plain text & structured text
|
|
23
|
+
"text/plain",
|
|
24
|
+
"text/markdown",
|
|
25
|
+
"text/tab-separated-values",
|
|
26
|
+
"text/xml",
|
|
27
|
+
"application/xml",
|
|
28
|
+
// RTF
|
|
29
|
+
"application/rtf",
|
|
30
|
+
"text/rtf",
|
|
31
|
+
// HTML
|
|
32
|
+
"text/html",
|
|
33
|
+
// OpenDocument & EPUB
|
|
34
|
+
"application/epub+zip",
|
|
35
|
+
"application/vnd.oasis.opendocument.text",
|
|
36
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
|
37
|
+
"application/vnd.oasis.opendocument.presentation",
|
|
38
|
+
// Bibliography / Academic
|
|
39
|
+
"application/x-bibtex",
|
|
40
|
+
];
|
|
41
|
+
export const supportedFileFormatsDescription = "PDF, DOCX, XLSX, PPTX, CSV, PNG, JPG, TIFF, BMP, HEIF, TXT, MD, TSV, XML, RTF, HTML, ODT, ODS, ODP, EPUB, BIB, RIS, NBIB, ENW";
|
|
42
|
+
/**
|
|
43
|
+
* Human-readable description of supported file types with extensions.
|
|
44
|
+
* Used in route-level API descriptions.
|
|
45
|
+
*/
|
|
46
|
+
export const supportedFileTypesLong = "PDF, Word (.docx), Excel (.xlsx), PowerPoint (.pptx), CSV, " +
|
|
47
|
+
"Image (.png, .jpg, .tiff, .bmp, .heif), " +
|
|
48
|
+
"Plain Text (.txt, .md, .tsv, .xml), RTF, HTML, " +
|
|
49
|
+
"OpenDocument (.odt, .ods, .odp), EPUB, " +
|
|
50
|
+
"and Bibliography (.bib, .ris, .nbib, .enw) files";
|
|
51
|
+
/**
|
|
52
|
+
* Per-tier file type support descriptions for model input fields.
|
|
53
|
+
* Format lists are stated once at the bottom to avoid wall-of-text in Scalar.
|
|
54
|
+
*/
|
|
55
|
+
const formatNote = "\n\n" +
|
|
56
|
+
"Supported formats by tier:\n" +
|
|
57
|
+
"- All tiers: PDF, Word, Excel, PowerPoint, CSV, Text, HTML, RTF, OpenDocument, EPUB, Bibliography.\n" +
|
|
58
|
+
"- Pro adds: Image (PNG, JPG).\n" +
|
|
59
|
+
"- Max/Auto adds: Image (PNG, JPG, TIFF, BMP, HEIF).";
|
|
60
|
+
/** Model tier descriptions for document parse endpoints. */
|
|
61
|
+
export const documentParseModelDescription = "Model tier for parsing.\n\n" +
|
|
62
|
+
"- auto (default): Intelligent fallback. Up to 1000 pages, 500MB.\n" +
|
|
63
|
+
"- nano: 1 credit/page. Simple plain text documents. Up to 30 pages, 10MB.\n" +
|
|
64
|
+
"- mini: 2 credits/page. Documents with tables and structured content. Up to 30 pages, 10MB.\n" +
|
|
65
|
+
"- pro: 4 credits/page. Tables, handwritten text, figures, math, Arabic. Up to 30 pages, 40MB.\n" +
|
|
66
|
+
"- max: 8 credits/page. Full Pro capabilities + enhanced multilingual. Up to 1000 pages, 500MB." +
|
|
67
|
+
formatNote;
|
|
68
|
+
/** Model tier descriptions for document extract endpoints. */
|
|
69
|
+
export const documentExtractModelDescription = "Model tier for extracting structured data.\n\n" +
|
|
70
|
+
"- auto (default): Automatically selects the best tier based on document complexity.\n" +
|
|
71
|
+
"- nano: 2 credits/page. Fastest. Best for simple documents with straightforward schemas.\n" +
|
|
72
|
+
"- mini: 4 credits/page. Balanced speed and accuracy. Moderately complex schemas.\n" +
|
|
73
|
+
"- pro: 8 credits/page. High accuracy for complex documents with large or nested schemas.\n" +
|
|
74
|
+
"- max: 16 credits/page. Maximum accuracy. Best for difficult extractions requiring deep reasoning." +
|
|
75
|
+
formatNote;
|
|
76
|
+
/** Model tier descriptions for document ask endpoints. */
|
|
77
|
+
export const documentAskModelDescription = "Model tier for answering the question.\n\n" +
|
|
78
|
+
"- auto (default): Automatically selects the best tier based on document complexity.\n" +
|
|
79
|
+
"- nano: 2 credits/page. Fastest. Best for simple questions about straightforward documents.\n" +
|
|
80
|
+
"- mini: 4 credits/page. Balanced speed and accuracy. Moderately complex questions.\n" +
|
|
81
|
+
"- pro: 8 credits/page. High accuracy for nuanced questions about complex documents.\n" +
|
|
82
|
+
"- max: 16 credits/page. Maximum accuracy. Best for difficult questions requiring deep reasoning." +
|
|
83
|
+
formatNote;
|
|
84
|
+
/** Model tier descriptions for invoice/identity/bankStatement parse endpoints (pro/max/auto only). */
|
|
85
|
+
export const specializedParseModelDescription = (type) => "Model tier for parsing.\n\n" +
|
|
86
|
+
"- auto (default): Intelligent fallback.\n" +
|
|
87
|
+
`- pro: 6 credits/page. Extracts structured ${type} fields with standard accuracy.\n` +
|
|
88
|
+
`- max: 10 credits/page. Extracts structured ${type} fields with highest accuracy and fallback.`;
|
|
89
|
+
/** Model tier descriptions for invoice/identity/bankStatement extract endpoints. */
|
|
90
|
+
export const specializedExtractModelDescription = "Model tier for extracting structured data.\n\n" +
|
|
91
|
+
"- auto (default): Automatically selects the best tier based on document complexity.\n" +
|
|
92
|
+
"- nano: 6 credits/page. Fastest. Best for simple documents with straightforward schemas.\n" +
|
|
93
|
+
"- mini: 10 credits/page. Balanced speed and accuracy. Moderately complex schemas.\n" +
|
|
94
|
+
"- pro: 14 credits/page. High accuracy for complex documents with large or nested schemas.\n" +
|
|
95
|
+
"- max: 18 credits/page. Maximum accuracy. Best for difficult extractions requiring deep reasoning." +
|
|
96
|
+
formatNote;
|
|
97
|
+
/** Model tier descriptions for invoice/identity/bankStatement ask endpoints. */
|
|
98
|
+
export const specializedAskModelDescription = "Model tier for answering the question.\n\n" +
|
|
99
|
+
"- auto (default): Automatically selects the best tier based on document complexity.\n" +
|
|
100
|
+
"- nano: 6 credits/page. Fastest. Best for simple questions about straightforward documents.\n" +
|
|
101
|
+
"- mini: 10 credits/page. Balanced speed and accuracy. Moderately complex questions.\n" +
|
|
102
|
+
"- pro: 14 credits/page. High accuracy for nuanced questions about complex documents.\n" +
|
|
103
|
+
"- max: 18 credits/page. Maximum accuracy. Best for difficult questions requiring deep reasoning." +
|
|
104
|
+
formatNote;
|
|
105
|
+
/** Output model description for parse results. */
|
|
106
|
+
export const outputModelDescription = "Model tier used to parse the document.\n\n" +
|
|
107
|
+
"- nano: Fastest, best for simple documents.\n" +
|
|
108
|
+
"- mini: Balanced speed and accuracy.\n" +
|
|
109
|
+
"- pro: High accuracy for complex documents.\n" +
|
|
110
|
+
"- max: Maximum accuracy with deep reasoning.";
|
|
111
|
+
/** Output model description for extract results. */
|
|
112
|
+
export const outputExtractModelDescription = "Model tier used to extract the data.\n\n" +
|
|
113
|
+
"- nano: Fastest, best for simple documents.\n" +
|
|
114
|
+
"- mini: Balanced speed and accuracy.\n" +
|
|
115
|
+
"- pro: High accuracy for complex documents.\n" +
|
|
116
|
+
"- max: Maximum accuracy with deep reasoning.";
|
|
117
|
+
/** Output model description for ask results. */
|
|
118
|
+
export const outputAskModelDescription = "Model tier used to answer the question.\n\n" +
|
|
119
|
+
"- nano: Fastest, best for simple questions.\n" +
|
|
120
|
+
"- mini: Balanced speed and accuracy.\n" +
|
|
121
|
+
"- pro: High accuracy for complex questions.\n" +
|
|
122
|
+
"- max: Maximum accuracy with deep reasoning.";
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# @pdfvector/instance-contract
|
|
2
2
|
|
|
3
|
+
## 0.0.28
|
|
4
|
+
### Patch Changes
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
- [#148](https://github.com/phuctm97/pdfvector/pull/148) [`9a8d292`](https://github.com/phuctm97/pdfvector/commit/9a8d2920d022837dd09eb297949c2a40acd04b68) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Add support for academic and additional file types
|
|
9
|
+
|
|
10
|
+
## 0.0.27
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
- [#146](https://github.com/phuctm97/pdfvector/pull/146) [`3f46b91`](https://github.com/phuctm97/pdfvector/commit/3f46b91bff72ba75616a5b529aa3e511ea4fb8a2) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Fix extract empty responses, track actual model, and reorder tier models
|
|
16
|
+
|
|
3
17
|
## 0.0.26
|
|
4
18
|
### Patch Changes
|
|
5
19
|
|