@pdfvector/instance-contract 0.0.17 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.tsc/lib/router/bankStatement/ask.d.ts +24 -0
  2. package/.tsc/lib/router/bankStatement/ask.js +70 -0
  3. package/.tsc/lib/router/bankStatement/extract.d.ts +25 -0
  4. package/.tsc/lib/router/bankStatement/extract.js +117 -0
  5. package/.tsc/lib/router/bankStatement/get-default-spec.d.ts +1 -0
  6. package/.tsc/lib/router/bankStatement/get-default-spec.js +19 -0
  7. package/.tsc/lib/router/bankStatement/index.d.ts +3 -0
  8. package/.tsc/lib/router/bankStatement/index.js +3 -0
  9. package/.tsc/lib/router/bankStatement/parse.d.ts +21 -0
  10. package/.tsc/lib/router/bankStatement/parse.js +84 -0
  11. package/.tsc/lib/router/document/parse.d.ts +1 -0
  12. package/.tsc/lib/router/document/parse.js +5 -0
  13. package/.tsc/lib/router/identity/ask.d.ts +24 -0
  14. package/.tsc/lib/router/identity/ask.js +77 -0
  15. package/.tsc/lib/router/identity/extract.d.ts +25 -0
  16. package/.tsc/lib/router/identity/extract.js +107 -0
  17. package/.tsc/lib/router/identity/get-default-spec.d.ts +1 -0
  18. package/.tsc/lib/router/identity/get-default-spec.js +19 -0
  19. package/.tsc/lib/router/identity/index.d.ts +3 -0
  20. package/.tsc/lib/router/identity/index.js +3 -0
  21. package/.tsc/lib/router/identity/parse.d.ts +22 -0
  22. package/.tsc/lib/router/identity/parse.js +88 -0
  23. package/.tsc/lib/router/index.d.ts +3 -0
  24. package/.tsc/lib/router/index.js +3 -0
  25. package/.tsc/lib/router/invoice/ask.d.ts +24 -0
  26. package/.tsc/lib/router/invoice/ask.js +67 -0
  27. package/.tsc/lib/router/invoice/extract.d.ts +25 -0
  28. package/.tsc/lib/router/invoice/extract.js +113 -0
  29. package/.tsc/lib/router/invoice/get-default-spec.d.ts +1 -0
  30. package/.tsc/lib/router/invoice/get-default-spec.js +19 -0
  31. package/.tsc/lib/router/invoice/index.d.ts +3 -0
  32. package/.tsc/lib/router/invoice/index.js +3 -0
  33. package/.tsc/lib/router/invoice/parse.d.ts +21 -0
  34. package/.tsc/lib/router/invoice/parse.js +79 -0
  35. package/CHANGELOG.md +14 -0
  36. package/package.json +1 -1
@@ -0,0 +1,24 @@
1
+ import { z } from "zod";
2
+ export declare const ask: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ question: z.ZodString;
7
+ model: z.ZodDefault<z.ZodEnum<{
8
+ nano: "nano";
9
+ mini: "mini";
10
+ pro: "pro";
11
+ max: "max";
12
+ auto: "auto";
13
+ }>>;
14
+ }, z.core.$strip>, z.ZodObject<{
15
+ markdown: z.ZodString;
16
+ model: z.ZodEnum<{
17
+ nano: "nano";
18
+ mini: "mini";
19
+ pro: "pro";
20
+ max: "max";
21
+ }>;
22
+ requestId: z.ZodNumber;
23
+ documentId: z.ZodOptional<z.ZodString>;
24
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,70 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { getDefaultSpec } from "./get-default-spec";
5
+ const specializedModelSchema = z
6
+ .enum(["auto", ...pdfvectorModelSchema.options])
7
+ .default("auto");
8
+ const askInputSchema = z.object({
9
+ url: z
10
+ .url()
11
+ .optional()
12
+ .describe("URL of the bank statement file to fetch and parse"),
13
+ file: z
14
+ .file()
15
+ .mime([
16
+ "application/pdf",
17
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
+ "text/csv",
20
+ "application/csv",
21
+ "image/png",
22
+ "image/jpeg",
23
+ ])
24
+ .optional()
25
+ .describe("Bank statement file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
26
+ base64: z
27
+ .string()
28
+ .optional()
29
+ .describe("Base64-encoded bank statement file content"),
30
+ question: z
31
+ .string()
32
+ .min(4)
33
+ .describe("The question to answer about the bank statement"),
34
+ model: specializedModelSchema.describe("Model tier for answering the question. " +
35
+ "'auto' (default): Automatically selects the best tier. " +
36
+ "'nano': Uses lightweight models. " +
37
+ "'mini': Uses mid-range models. " +
38
+ "'pro': Uses capable models. " +
39
+ "'max': Uses the most powerful models."),
40
+ });
41
+ const askOutputSchema = z.object({
42
+ markdown: z.string().describe("The answer to the question"),
43
+ model: pdfvectorModelSchema.describe("Model tier used to answer the question"),
44
+ requestId: z
45
+ .number()
46
+ .int()
47
+ .describe("Unique request identifier for this API call"),
48
+ documentId: z
49
+ .string()
50
+ .optional()
51
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
52
+ });
53
+ const requestExamples = {
54
+ "Ask from URL": {
55
+ summary: "Ask from URL",
56
+ value: {
57
+ url: "https://example.com/bank-statement.pdf",
58
+ question: "What is the ending balance?",
59
+ },
60
+ },
61
+ };
62
+ export const ask = oc
63
+ .route({
64
+ summary: "Ask a question about a bank statement",
65
+ description: "Parse a bank statement and answer a question about its content using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
66
+ tags: ["Bank Statement"],
67
+ spec: (op) => getDefaultSpec(op, requestExamples),
68
+ })
69
+ .input(askInputSchema)
70
+ .output(askOutputSchema);
@@ -0,0 +1,25 @@
1
+ import { z } from "zod";
2
+ export declare const extract: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ prompt: z.ZodString;
7
+ schema: z.ZodUnion<readonly [z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodPipe<z.ZodString, z.ZodTransform<Record<string, unknown>, string>>]>;
8
+ model: z.ZodDefault<z.ZodEnum<{
9
+ nano: "nano";
10
+ mini: "mini";
11
+ pro: "pro";
12
+ max: "max";
13
+ auto: "auto";
14
+ }>>;
15
+ }, z.core.$strip>, z.ZodObject<{
16
+ data: z.ZodUnknown;
17
+ model: z.ZodEnum<{
18
+ nano: "nano";
19
+ mini: "mini";
20
+ pro: "pro";
21
+ max: "max";
22
+ }>;
23
+ requestId: z.ZodNumber;
24
+ documentId: z.ZodOptional<z.ZodString>;
25
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,117 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { getDefaultSpec } from "./get-default-spec";
5
+ const specializedModelSchema = z
6
+ .enum(["auto", ...pdfvectorModelSchema.options])
7
+ .default("auto");
8
+ const extractInputSchema = z.object({
9
+ url: z
10
+ .url()
11
+ .optional()
12
+ .describe("URL of the bank statement file to fetch and parse"),
13
+ file: z
14
+ .file()
15
+ .mime([
16
+ "application/pdf",
17
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
+ "text/csv",
20
+ "application/csv",
21
+ "image/png",
22
+ "image/jpeg",
23
+ ])
24
+ .optional()
25
+ .describe("Bank statement file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
26
+ base64: z
27
+ .string()
28
+ .optional()
29
+ .describe("Base64-encoded bank statement file content"),
30
+ prompt: z
31
+ .string()
32
+ .min(4)
33
+ .describe("The prompt instructing the AI how to extract data from the bank statement"),
34
+ schema: z
35
+ .union([
36
+ z.record(z.string(), z.unknown()),
37
+ z.string().transform((str, ctx) => {
38
+ try {
39
+ const trimmed = str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str;
40
+ const parsed = JSON.parse(trimmed);
41
+ if (typeof parsed !== "object" || parsed === null) {
42
+ ctx.addIssue({
43
+ code: z.ZodIssueCode.custom,
44
+ message: "Schema must be a JSON object",
45
+ });
46
+ return z.NEVER;
47
+ }
48
+ return parsed;
49
+ }
50
+ catch {
51
+ ctx.addIssue({
52
+ code: z.ZodIssueCode.custom,
53
+ message: "Invalid JSON string for schema",
54
+ });
55
+ return z.NEVER;
56
+ }
57
+ }),
58
+ ])
59
+ .describe("JSON Schema describing the structure of the data to extract from the bank statement. Can be a JSON object or a JSON string."),
60
+ model: specializedModelSchema.describe("Model tier for extracting structured data. " +
61
+ "'auto' (default): Automatically selects the best tier. " +
62
+ "'nano': Uses lightweight models. " +
63
+ "'mini': Uses mid-range models. " +
64
+ "'pro': Uses capable models. " +
65
+ "'max': Uses the most powerful models."),
66
+ });
67
+ const extractOutputSchema = z.object({
68
+ data: z
69
+ .unknown()
70
+ .describe("Extracted structured data matching the provided JSON Schema"),
71
+ model: pdfvectorModelSchema.describe("Model tier used to extract the data"),
72
+ requestId: z
73
+ .number()
74
+ .int()
75
+ .describe("Unique request identifier for this API call"),
76
+ documentId: z
77
+ .string()
78
+ .optional()
79
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
80
+ });
81
+ const requestExamples = {
82
+ "Extract from URL": {
83
+ summary: "Extract from URL",
84
+ value: {
85
+ url: "https://example.com/bank-statement.pdf",
86
+ prompt: "Extract the account number, statement period, and transactions",
87
+ schema: JSON.stringify({
88
+ type: "object",
89
+ properties: {
90
+ accountNumber: { type: "string" },
91
+ statementPeriod: { type: "string" },
92
+ transactions: {
93
+ type: "array",
94
+ items: {
95
+ type: "object",
96
+ properties: {
97
+ date: { type: "string" },
98
+ description: { type: "string" },
99
+ amount: { type: "number" },
100
+ },
101
+ },
102
+ },
103
+ },
104
+ required: ["accountNumber", "statementPeriod"],
105
+ }),
106
+ },
107
+ },
108
+ };
109
+ export const extract = oc
110
+ .route({
111
+ summary: "Extract structured data from a bank statement",
112
+ description: "Parse a bank statement and extract structured data matching a provided JSON Schema using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
113
+ tags: ["Bank Statement"],
114
+ spec: (op) => getDefaultSpec(op, requestExamples),
115
+ })
116
+ .input(extractInputSchema)
117
+ .output(extractOutputSchema);
@@ -0,0 +1 @@
1
+ export declare function getDefaultSpec(op: Record<string, unknown>, requestExamples: Record<string, unknown>): Record<string, unknown>;
@@ -0,0 +1,19 @@
1
+ export function getDefaultSpec(op, requestExamples) {
2
+ op.security = [{ bearerAuth: [] }];
3
+ const params = (op.parameters ?? []);
4
+ params.push({
5
+ name: "x-pdfvector-document-id",
6
+ in: "header",
7
+ required: false,
8
+ schema: { type: "string", default: "my-doc-123" },
9
+ description: "Optional document ID to associate with this request. Returned in the response and saved for usage tracking.",
10
+ });
11
+ op.parameters = params;
12
+ const reqBody = op.requestBody;
13
+ if (reqBody?.content) {
14
+ for (const mediaType of Object.values(reqBody.content)) {
15
+ mediaType.examples = requestExamples;
16
+ }
17
+ }
18
+ return op;
19
+ }
@@ -0,0 +1,3 @@
1
+ export * from "./ask";
2
+ export * from "./extract";
3
+ export * from "./parse";
@@ -0,0 +1,3 @@
1
+ export * from "./ask";
2
+ export * from "./extract";
3
+ export * from "./parse";
@@ -0,0 +1,21 @@
1
+ import { z } from "zod";
2
+ export declare const parse: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ model: z.ZodDefault<z.ZodEnum<{
7
+ pro: "pro";
8
+ max: "max";
9
+ auto: "auto";
10
+ }>>;
11
+ }, z.core.$strip>, z.ZodObject<{
12
+ markdown: z.ZodString;
13
+ pageCount: z.ZodNumber;
14
+ model: z.ZodEnum<{
15
+ pro: "pro";
16
+ max: "max";
17
+ }>;
18
+ requestId: z.ZodNumber;
19
+ html: z.ZodOptional<z.ZodString>;
20
+ documentId: z.ZodOptional<z.ZodString>;
21
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,84 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { getDefaultSpec } from "./get-default-spec";
4
+ const specializedParseModelSchema = z
5
+ .enum(["pro", "max", "auto"])
6
+ .default("auto");
7
+ const parseInputSchema = z.object({
8
+ url: z
9
+ .url()
10
+ .optional()
11
+ .describe("URL of the bank statement file to fetch and parse"),
12
+ file: z
13
+ .file()
14
+ .mime([
15
+ "application/pdf",
16
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
17
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
18
+ "text/csv",
19
+ "application/csv",
20
+ "image/png",
21
+ "image/jpeg",
22
+ ])
23
+ .optional()
24
+ .describe("Bank statement file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
25
+ base64: z
26
+ .string()
27
+ .optional()
28
+ .describe("Base64-encoded bank statement file content"),
29
+ model: specializedParseModelSchema.describe("Model tier for parsing. " +
30
+ "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
31
+ "'pro': Extracts structured bank statement fields with standard accuracy. " +
32
+ "'max': Extracts structured bank statement fields with highest accuracy and fallback."),
33
+ });
34
+ const parseOutputSchema = z.object({
35
+ markdown: z
36
+ .string()
37
+ .describe("Extracted text content from the bank statement"),
38
+ pageCount: z.number().int().describe("Total number of pages in the document"),
39
+ model: z
40
+ .enum(["pro", "max"])
41
+ .describe("Model tier used to parse the bank statement"),
42
+ requestId: z
43
+ .number()
44
+ .int()
45
+ .describe("Unique request identifier for this API call"),
46
+ html: z
47
+ .string()
48
+ .optional()
49
+ .describe("Full HTML representation of the document content. Only available when using the 'max' model. " +
50
+ "Preserves rich formatting, tables, selection marks, and visual layout that cannot be fully represented in markdown."),
51
+ documentId: z
52
+ .string()
53
+ .optional()
54
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
55
+ });
56
+ const requestExamples = {
57
+ "Parse from URL": {
58
+ summary: "Parse from URL",
59
+ value: {
60
+ url: "https://example.com/bank-statement.pdf",
61
+ },
62
+ },
63
+ "Parse from base64": {
64
+ summary: "Parse from base64",
65
+ value: {
66
+ base64: "JVBERi0xLjAKMSAwIG9iajw8L1R5cGUvQ2F0YWxvZy9QYWdlcyAyIDAgUj4+ZW5kb2JqIDIgMCBvYmo8PC9UeXBlL1BhZ2VzL0tpZHNbMyAwIFJdL0NvdW50IDE+PmVuZG9iaiAzIDAgb2JqPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCAzIDNdL1BhcmVudCAyIDAgUj4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAwOSAwMDAwMCBuIAowMDAwMDAwMDU4IDAwMDAwIG4gCjAwMDAwMDAxMTUgMDAwMDAgbiAKdHJhaWxlcjw8L1NpemUgNC9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjE5MAolJUVPRg==",
67
+ },
68
+ },
69
+ "Parse from file upload": {
70
+ summary: "Parse from file upload",
71
+ value: {
72
+ file: "(binary)",
73
+ },
74
+ },
75
+ };
76
+ export const parse = oc
77
+ .route({
78
+ summary: "Parse a bank statement",
79
+ description: "Extract text and structured data from a bank statement. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
80
+ tags: ["Bank Statement"],
81
+ spec: (op) => getDefaultSpec(op, requestExamples),
82
+ })
83
+ .input(parseInputSchema)
84
+ .output(parseOutputSchema);
@@ -20,5 +20,6 @@ export declare const parse: import("@orpc/contract").ContractProcedureBuilderWit
20
20
  max: "max";
21
21
  }>;
22
22
  requestId: z.ZodNumber;
23
+ html: z.ZodOptional<z.ZodString>;
23
24
  documentId: z.ZodOptional<z.ZodString>;
24
25
  }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -47,6 +47,11 @@ const parseOutputSchema = z.object({
47
47
  .number()
48
48
  .int()
49
49
  .describe("Unique request identifier for this API call"),
50
+ html: z
51
+ .string()
52
+ .optional()
53
+ .describe("Full HTML representation of the document content. Only available when using the 'max' model. " +
54
+ "Preserves rich formatting, tables, selection marks, and visual layout that cannot be fully represented in markdown."),
50
55
  documentId: z
51
56
  .string()
52
57
  .optional()
@@ -0,0 +1,24 @@
1
+ import { z } from "zod";
2
+ export declare const ask: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ question: z.ZodString;
7
+ model: z.ZodDefault<z.ZodEnum<{
8
+ nano: "nano";
9
+ mini: "mini";
10
+ pro: "pro";
11
+ max: "max";
12
+ auto: "auto";
13
+ }>>;
14
+ }, z.core.$strip>, z.ZodObject<{
15
+ markdown: z.ZodString;
16
+ model: z.ZodEnum<{
17
+ nano: "nano";
18
+ mini: "mini";
19
+ pro: "pro";
20
+ max: "max";
21
+ }>;
22
+ requestId: z.ZodNumber;
23
+ documentId: z.ZodOptional<z.ZodString>;
24
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,77 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { getDefaultSpec } from "./get-default-spec";
5
+ const specializedModelSchema = z
6
+ .enum(["auto", ...pdfvectorModelSchema.options])
7
+ .default("auto");
8
+ const askInputSchema = z.object({
9
+ url: z
10
+ .url()
11
+ .optional()
12
+ .describe("URL of the identity document file to fetch and parse"),
13
+ file: z
14
+ .file()
15
+ .mime([
16
+ "application/pdf",
17
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
+ "text/csv",
20
+ "application/csv",
21
+ "image/png",
22
+ "image/jpeg",
23
+ ])
24
+ .optional()
25
+ .describe("Identity document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
26
+ base64: z
27
+ .string()
28
+ .optional()
29
+ .describe("Base64-encoded identity document file content"),
30
+ question: z
31
+ .string()
32
+ .min(4)
33
+ .describe("The question to answer about the identity document"),
34
+ model: specializedModelSchema.describe("Model tier for answering the question. " +
35
+ "'auto' (default): Automatically selects the best tier. " +
36
+ "'nano': Uses lightweight models. " +
37
+ "'mini': Uses mid-range models. " +
38
+ "'pro': Uses capable models. " +
39
+ "'max': Uses the most powerful models."),
40
+ });
41
+ const askOutputSchema = z.object({
42
+ markdown: z.string().describe("The answer to the question"),
43
+ model: pdfvectorModelSchema.describe("Model tier used to answer the question"),
44
+ requestId: z
45
+ .number()
46
+ .int()
47
+ .describe("Unique request identifier for this API call"),
48
+ documentId: z
49
+ .string()
50
+ .optional()
51
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
52
+ });
53
+ const requestExamples = {
54
+ "Ask from URL": {
55
+ summary: "Ask from URL",
56
+ value: {
57
+ url: "https://example.com/identity-document.pdf",
58
+ question: "What is the name on this identity document?",
59
+ },
60
+ },
61
+ "Ask from base64": {
62
+ summary: "Ask from base64",
63
+ value: {
64
+ base64: "JVBERi0xLjAKMSAwIG9iajw8L1R5cGUvQ2F0YWxvZy9QYWdlcyAyIDAgUj4+ZW5kb2JqIDIgMCBvYmo8PC9UeXBlL1BhZ2VzL0tpZHNbMyAwIFJdL0NvdW50IDE+PmVuZG9iaiAzIDAgb2JqPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCAzIDNdL1BhcmVudCAyIDAgUj4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAwOSAwMDAwMCBuIAowMDAwMDAwMDU4IDAwMDAwIG4gCjAwMDAwMDAxMTUgMDAwMDAgbiAKdHJhaWxlcjw8L1NpemUgNC9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjE5MAolJUVPRg==",
65
+ question: "What is the document number?",
66
+ },
67
+ },
68
+ };
69
+ export const ask = oc
70
+ .route({
71
+ summary: "Ask a question about an identity document",
72
+ description: "Parse an identity document and answer a question about its content using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
73
+ tags: ["Identity"],
74
+ spec: (op) => getDefaultSpec(op, requestExamples),
75
+ })
76
+ .input(askInputSchema)
77
+ .output(askOutputSchema);
@@ -0,0 +1,25 @@
1
+ import { z } from "zod";
2
+ export declare const extract: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ prompt: z.ZodString;
7
+ schema: z.ZodUnion<readonly [z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodPipe<z.ZodString, z.ZodTransform<Record<string, unknown>, string>>]>;
8
+ model: z.ZodDefault<z.ZodEnum<{
9
+ nano: "nano";
10
+ mini: "mini";
11
+ pro: "pro";
12
+ max: "max";
13
+ auto: "auto";
14
+ }>>;
15
+ }, z.core.$strip>, z.ZodObject<{
16
+ data: z.ZodUnknown;
17
+ model: z.ZodEnum<{
18
+ nano: "nano";
19
+ mini: "mini";
20
+ pro: "pro";
21
+ max: "max";
22
+ }>;
23
+ requestId: z.ZodNumber;
24
+ documentId: z.ZodOptional<z.ZodString>;
25
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,107 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { getDefaultSpec } from "./get-default-spec";
5
+ const specializedModelSchema = z
6
+ .enum(["auto", ...pdfvectorModelSchema.options])
7
+ .default("auto");
8
+ const extractInputSchema = z.object({
9
+ url: z
10
+ .url()
11
+ .optional()
12
+ .describe("URL of the identity document file to fetch and parse"),
13
+ file: z
14
+ .file()
15
+ .mime([
16
+ "application/pdf",
17
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
+ "text/csv",
20
+ "application/csv",
21
+ "image/png",
22
+ "image/jpeg",
23
+ ])
24
+ .optional()
25
+ .describe("Identity document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
26
+ base64: z
27
+ .string()
28
+ .optional()
29
+ .describe("Base64-encoded identity document file content"),
30
+ prompt: z
31
+ .string()
32
+ .min(4)
33
+ .describe("The prompt instructing the AI how to extract data from the identity document"),
34
+ schema: z
35
+ .union([
36
+ z.record(z.string(), z.unknown()),
37
+ z.string().transform((str, ctx) => {
38
+ try {
39
+ const trimmed = str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str;
40
+ const parsed = JSON.parse(trimmed);
41
+ if (typeof parsed !== "object" || parsed === null) {
42
+ ctx.addIssue({
43
+ code: z.ZodIssueCode.custom,
44
+ message: "Schema must be a JSON object",
45
+ });
46
+ return z.NEVER;
47
+ }
48
+ return parsed;
49
+ }
50
+ catch {
51
+ ctx.addIssue({
52
+ code: z.ZodIssueCode.custom,
53
+ message: "Invalid JSON string for schema",
54
+ });
55
+ return z.NEVER;
56
+ }
57
+ }),
58
+ ])
59
+ .describe("JSON Schema describing the structure of the data to extract from the identity document. Can be a JSON object or a JSON string."),
60
+ model: specializedModelSchema.describe("Model tier for extracting structured data. " +
61
+ "'auto' (default): Automatically selects the best tier. " +
62
+ "'nano': Uses lightweight models. " +
63
+ "'mini': Uses mid-range models. " +
64
+ "'pro': Uses capable models. " +
65
+ "'max': Uses the most powerful models."),
66
+ });
67
+ const extractOutputSchema = z.object({
68
+ data: z
69
+ .unknown()
70
+ .describe("Extracted structured data matching the provided JSON Schema"),
71
+ model: pdfvectorModelSchema.describe("Model tier used to extract the data"),
72
+ requestId: z
73
+ .number()
74
+ .int()
75
+ .describe("Unique request identifier for this API call"),
76
+ documentId: z
77
+ .string()
78
+ .optional()
79
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
80
+ });
81
+ const requestExamples = {
82
+ "Extract from URL": {
83
+ summary: "Extract from URL",
84
+ value: {
85
+ url: "https://example.com/identity-document.pdf",
86
+ prompt: "Extract the name, date of birth, and document number",
87
+ schema: JSON.stringify({
88
+ type: "object",
89
+ properties: {
90
+ name: { type: "string" },
91
+ dateOfBirth: { type: "string" },
92
+ documentNumber: { type: "string" },
93
+ },
94
+ required: ["name", "dateOfBirth", "documentNumber"],
95
+ }),
96
+ },
97
+ },
98
+ };
99
+ export const extract = oc
100
+ .route({
101
+ summary: "Extract structured data from an identity document",
102
+ description: "Parse an identity document and extract structured data matching a provided JSON Schema using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
103
+ tags: ["Identity"],
104
+ spec: (op) => getDefaultSpec(op, requestExamples),
105
+ })
106
+ .input(extractInputSchema)
107
+ .output(extractOutputSchema);
@@ -0,0 +1 @@
1
+ export declare function getDefaultSpec(op: Record<string, unknown>, requestExamples: Record<string, unknown>): Record<string, unknown>;
@@ -0,0 +1,19 @@
1
+ export function getDefaultSpec(op, requestExamples) {
2
+ op.security = [{ bearerAuth: [] }];
3
+ const params = (op.parameters ?? []);
4
+ params.push({
5
+ name: "x-pdfvector-document-id",
6
+ in: "header",
7
+ required: false,
8
+ schema: { type: "string", default: "my-doc-123" },
9
+ description: "Optional document ID to associate with this request. Returned in the response and saved for usage tracking.",
10
+ });
11
+ op.parameters = params;
12
+ const reqBody = op.requestBody;
13
+ if (reqBody?.content) {
14
+ for (const mediaType of Object.values(reqBody.content)) {
15
+ mediaType.examples = requestExamples;
16
+ }
17
+ }
18
+ return op;
19
+ }
@@ -0,0 +1,3 @@
1
+ export * from "./ask";
2
+ export * from "./extract";
3
+ export * from "./parse";
@@ -0,0 +1,3 @@
1
+ export * from "./ask";
2
+ export * from "./extract";
3
+ export * from "./parse";
@@ -0,0 +1,22 @@
1
+ import { z } from "zod";
2
+ export declare const parse: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ model: z.ZodDefault<z.ZodEnum<{
7
+ pro: "pro";
8
+ max: "max";
9
+ auto: "auto";
10
+ }>>;
11
+ }, z.core.$strip>, z.ZodObject<{
12
+ markdown: z.ZodString;
13
+ pageCount: z.ZodNumber;
14
+ model: z.ZodEnum<{
15
+ pro: "pro";
16
+ max: "max";
17
+ }>;
18
+ documentType: z.ZodOptional<z.ZodString>;
19
+ html: z.ZodOptional<z.ZodString>;
20
+ requestId: z.ZodNumber;
21
+ documentId: z.ZodOptional<z.ZodString>;
22
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,88 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { getDefaultSpec } from "./get-default-spec";
4
+ const specializedParseModelSchema = z
5
+ .enum(["pro", "max", "auto"])
6
+ .default("auto");
7
+ const parseInputSchema = z.object({
8
+ url: z
9
+ .url()
10
+ .optional()
11
+ .describe("URL of the identity document file to fetch and parse"),
12
+ file: z
13
+ .file()
14
+ .mime([
15
+ "application/pdf",
16
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
17
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
18
+ "text/csv",
19
+ "application/csv",
20
+ "image/png",
21
+ "image/jpeg",
22
+ ])
23
+ .optional()
24
+ .describe("Identity document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
25
+ base64: z
26
+ .string()
27
+ .optional()
28
+ .describe("Base64-encoded identity document file content"),
29
+ model: specializedParseModelSchema.describe("Model tier for parsing. " +
30
+ "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
31
+ "'pro': Extracts structured identity document fields with standard accuracy. " +
32
+ "'max': Extracts structured identity document fields with highest accuracy."),
33
+ });
34
+ const parseOutputSchema = z.object({
35
+ markdown: z
36
+ .string()
37
+ .describe("Extracted text content from the identity document"),
38
+ pageCount: z.number().int().describe("Total number of pages in the document"),
39
+ model: z
40
+ .enum(["pro", "max"])
41
+ .describe("Model tier used to parse the identity document"),
42
+ documentType: z
43
+ .string()
44
+ .optional()
45
+ .describe("Detected identity document type (e.g., passport, driver's license)"),
46
+ html: z
47
+ .string()
48
+ .optional()
49
+ .describe("Full HTML representation of the document content. Only available when using the 'max' model. " +
50
+ "Preserves rich formatting, tables, selection marks, and visual layout that cannot be fully represented in markdown."),
51
+ requestId: z
52
+ .number()
53
+ .int()
54
+ .describe("Unique request identifier for this API call"),
55
+ documentId: z
56
+ .string()
57
+ .optional()
58
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
59
+ });
60
+ const requestExamples = {
61
+ "Parse from URL": {
62
+ summary: "Parse from URL",
63
+ value: {
64
+ url: "https://example.com/identity-document.pdf",
65
+ },
66
+ },
67
+ "Parse from base64": {
68
+ summary: "Parse from base64",
69
+ value: {
70
+ base64: "JVBERi0xLjAKMSAwIG9iajw8L1R5cGUvQ2F0YWxvZy9QYWdlcyAyIDAgUj4+ZW5kb2JqIDIgMCBvYmo8PC9UeXBlL1BhZ2VzL0tpZHNbMyAwIFJdL0NvdW50IDE+PmVuZG9iaiAzIDAgb2JqPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCAzIDNdL1BhcmVudCAyIDAgUj4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAwOSAwMDAwMCBuIAowMDAwMDAwMDU4IDAwMDAwIG4gCjAwMDAwMDAxMTUgMDAwMDAgbiAKdHJhaWxlcjw8L1NpemUgNC9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjE5MAolJUVPRg==",
71
+ },
72
+ },
73
+ "Parse from file upload": {
74
+ summary: "Parse from file upload",
75
+ value: {
76
+ file: "(binary)",
77
+ },
78
+ },
79
+ };
80
+ export const parse = oc
81
+ .route({
82
+ summary: "Parse an identity document",
83
+ description: "Extract text and structured data from an identity document. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
84
+ tags: ["Identity"],
85
+ spec: (op) => getDefaultSpec(op, requestExamples),
86
+ })
87
+ .input(parseInputSchema)
88
+ .output(parseOutputSchema);
@@ -1,3 +1,6 @@
1
1
  export * as admin from "./admin";
2
2
  export * as authenticate from "./authenticate";
3
+ export * as bankStatement from "./bankStatement";
3
4
  export * as document from "./document";
5
+ export * as identity from "./identity";
6
+ export * as invoice from "./invoice";
@@ -1,3 +1,6 @@
1
1
  export * as admin from "./admin";
2
2
  export * as authenticate from "./authenticate";
3
+ export * as bankStatement from "./bankStatement";
3
4
  export * as document from "./document";
5
+ export * as identity from "./identity";
6
+ export * as invoice from "./invoice";
@@ -0,0 +1,24 @@
1
+ import { z } from "zod";
2
+ export declare const ask: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ question: z.ZodString;
7
+ model: z.ZodDefault<z.ZodEnum<{
8
+ nano: "nano";
9
+ mini: "mini";
10
+ pro: "pro";
11
+ max: "max";
12
+ auto: "auto";
13
+ }>>;
14
+ }, z.core.$strip>, z.ZodObject<{
15
+ markdown: z.ZodString;
16
+ model: z.ZodEnum<{
17
+ nano: "nano";
18
+ mini: "mini";
19
+ pro: "pro";
20
+ max: "max";
21
+ }>;
22
+ requestId: z.ZodNumber;
23
+ documentId: z.ZodOptional<z.ZodString>;
24
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,67 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { getDefaultSpec } from "./get-default-spec";
5
+ const specializedModelSchema = z
6
+ .enum(["auto", ...pdfvectorModelSchema.options])
7
+ .default("auto");
8
+ const askInputSchema = z.object({
9
+ url: z
10
+ .url()
11
+ .optional()
12
+ .describe("URL of the invoice file to fetch and parse"),
13
+ file: z
14
+ .file()
15
+ .mime([
16
+ "application/pdf",
17
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
+ "text/csv",
20
+ "application/csv",
21
+ "image/png",
22
+ "image/jpeg",
23
+ ])
24
+ .optional()
25
+ .describe("Invoice file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
26
+ base64: z.string().optional().describe("Base64-encoded invoice file content"),
27
+ question: z
28
+ .string()
29
+ .min(4)
30
+ .describe("The question to answer about the invoice"),
31
+ model: specializedModelSchema.describe("Model tier for answering the question. " +
32
+ "'auto' (default): Automatically selects the best tier. " +
33
+ "'nano': Uses lightweight models. " +
34
+ "'mini': Uses mid-range models. " +
35
+ "'pro': Uses capable models. " +
36
+ "'max': Uses the most powerful models."),
37
+ });
38
+ const askOutputSchema = z.object({
39
+ markdown: z.string().describe("The answer to the question"),
40
+ model: pdfvectorModelSchema.describe("Model tier used to answer the question"),
41
+ requestId: z
42
+ .number()
43
+ .int()
44
+ .describe("Unique request identifier for this API call"),
45
+ documentId: z
46
+ .string()
47
+ .optional()
48
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
49
+ });
50
+ const requestExamples = {
51
+ "Ask from URL": {
52
+ summary: "Ask from URL",
53
+ value: {
54
+ url: "https://example.com/invoice.pdf",
55
+ question: "What is the total amount on this invoice?",
56
+ },
57
+ },
58
+ };
59
+ export const ask = oc
60
+ .route({
61
+ summary: "Ask a question about an invoice",
62
+ description: "Parse an invoice and answer a question about its content using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
63
+ tags: ["Invoice"],
64
+ spec: (op) => getDefaultSpec(op, requestExamples),
65
+ })
66
+ .input(askInputSchema)
67
+ .output(askOutputSchema);
@@ -0,0 +1,25 @@
1
+ import { z } from "zod";
2
+ export declare const extract: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ prompt: z.ZodString;
7
+ schema: z.ZodUnion<readonly [z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodPipe<z.ZodString, z.ZodTransform<Record<string, unknown>, string>>]>;
8
+ model: z.ZodDefault<z.ZodEnum<{
9
+ nano: "nano";
10
+ mini: "mini";
11
+ pro: "pro";
12
+ max: "max";
13
+ auto: "auto";
14
+ }>>;
15
+ }, z.core.$strip>, z.ZodObject<{
16
+ data: z.ZodUnknown;
17
+ model: z.ZodEnum<{
18
+ nano: "nano";
19
+ mini: "mini";
20
+ pro: "pro";
21
+ max: "max";
22
+ }>;
23
+ requestId: z.ZodNumber;
24
+ documentId: z.ZodOptional<z.ZodString>;
25
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,113 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { getDefaultSpec } from "./get-default-spec";
5
+ const specializedModelSchema = z
6
+ .enum(["auto", ...pdfvectorModelSchema.options])
7
+ .default("auto");
8
+ const extractInputSchema = z.object({
9
+ url: z
10
+ .url()
11
+ .optional()
12
+ .describe("URL of the invoice file to fetch and parse"),
13
+ file: z
14
+ .file()
15
+ .mime([
16
+ "application/pdf",
17
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
+ "text/csv",
20
+ "application/csv",
21
+ "image/png",
22
+ "image/jpeg",
23
+ ])
24
+ .optional()
25
+ .describe("Invoice file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
26
+ base64: z.string().optional().describe("Base64-encoded invoice file content"),
27
+ prompt: z
28
+ .string()
29
+ .min(4)
30
+ .describe("The prompt instructing the AI how to extract data from the invoice"),
31
+ schema: z
32
+ .union([
33
+ z.record(z.string(), z.unknown()),
34
+ z.string().transform((str, ctx) => {
35
+ try {
36
+ const trimmed = str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str;
37
+ const parsed = JSON.parse(trimmed);
38
+ if (typeof parsed !== "object" || parsed === null) {
39
+ ctx.addIssue({
40
+ code: z.ZodIssueCode.custom,
41
+ message: "Schema must be a JSON object",
42
+ });
43
+ return z.NEVER;
44
+ }
45
+ return parsed;
46
+ }
47
+ catch {
48
+ ctx.addIssue({
49
+ code: z.ZodIssueCode.custom,
50
+ message: "Invalid JSON string for schema",
51
+ });
52
+ return z.NEVER;
53
+ }
54
+ }),
55
+ ])
56
+ .describe("JSON Schema describing the structure of the data to extract from the invoice. Can be a JSON object or a JSON string."),
57
+ model: specializedModelSchema.describe("Model tier for extracting structured data. " +
58
+ "'auto' (default): Automatically selects the best tier. " +
59
+ "'nano': Uses lightweight models. " +
60
+ "'mini': Uses mid-range models. " +
61
+ "'pro': Uses capable models. " +
62
+ "'max': Uses the most powerful models."),
63
+ });
64
+ const extractOutputSchema = z.object({
65
+ data: z
66
+ .unknown()
67
+ .describe("Extracted structured data matching the provided JSON Schema"),
68
+ model: pdfvectorModelSchema.describe("Model tier used to extract the data"),
69
+ requestId: z
70
+ .number()
71
+ .int()
72
+ .describe("Unique request identifier for this API call"),
73
+ documentId: z
74
+ .string()
75
+ .optional()
76
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
77
+ });
78
+ const requestExamples = {
79
+ "Extract from URL": {
80
+ summary: "Extract from URL",
81
+ value: {
82
+ url: "https://example.com/invoice.pdf",
83
+ prompt: "Extract the vendor name, total amount, and line items",
84
+ schema: JSON.stringify({
85
+ type: "object",
86
+ properties: {
87
+ vendorName: { type: "string" },
88
+ totalAmount: { type: "number" },
89
+ lineItems: {
90
+ type: "array",
91
+ items: {
92
+ type: "object",
93
+ properties: {
94
+ description: { type: "string" },
95
+ amount: { type: "number" },
96
+ },
97
+ },
98
+ },
99
+ },
100
+ required: ["vendorName", "totalAmount"],
101
+ }),
102
+ },
103
+ },
104
+ };
105
+ export const extract = oc
106
+ .route({
107
+ summary: "Extract structured data from an invoice",
108
+ description: "Parse an invoice and extract structured data matching a provided JSON Schema using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
109
+ tags: ["Invoice"],
110
+ spec: (op) => getDefaultSpec(op, requestExamples),
111
+ })
112
+ .input(extractInputSchema)
113
+ .output(extractOutputSchema);
@@ -0,0 +1 @@
1
+ export declare function getDefaultSpec(op: Record<string, unknown>, requestExamples: Record<string, unknown>): Record<string, unknown>;
@@ -0,0 +1,19 @@
1
+ export function getDefaultSpec(op, requestExamples) {
2
+ op.security = [{ bearerAuth: [] }];
3
+ const params = (op.parameters ?? []);
4
+ params.push({
5
+ name: "x-pdfvector-document-id",
6
+ in: "header",
7
+ required: false,
8
+ schema: { type: "string", default: "my-doc-123" },
9
+ description: "Optional document ID to associate with this request. Returned in the response and saved for usage tracking.",
10
+ });
11
+ op.parameters = params;
12
+ const reqBody = op.requestBody;
13
+ if (reqBody?.content) {
14
+ for (const mediaType of Object.values(reqBody.content)) {
15
+ mediaType.examples = requestExamples;
16
+ }
17
+ }
18
+ return op;
19
+ }
@@ -0,0 +1,3 @@
1
+ export * from "./ask";
2
+ export * from "./extract";
3
+ export * from "./parse";
@@ -0,0 +1,3 @@
1
+ export * from "./ask";
2
+ export * from "./extract";
3
+ export * from "./parse";
@@ -0,0 +1,21 @@
1
+ import { z } from "zod";
2
+ export declare const parse: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ url: z.ZodOptional<z.ZodURL>;
4
+ file: z.ZodOptional<z.ZodFile>;
5
+ base64: z.ZodOptional<z.ZodString>;
6
+ model: z.ZodDefault<z.ZodEnum<{
7
+ pro: "pro";
8
+ max: "max";
9
+ auto: "auto";
10
+ }>>;
11
+ }, z.core.$strip>, z.ZodObject<{
12
+ markdown: z.ZodString;
13
+ pageCount: z.ZodNumber;
14
+ model: z.ZodEnum<{
15
+ pro: "pro";
16
+ max: "max";
17
+ }>;
18
+ requestId: z.ZodNumber;
19
+ html: z.ZodOptional<z.ZodString>;
20
+ documentId: z.ZodOptional<z.ZodString>;
21
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,79 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { getDefaultSpec } from "./get-default-spec";
4
+ const specializedParseModelSchema = z
5
+ .enum(["pro", "max", "auto"])
6
+ .default("auto");
7
+ const parseInputSchema = z.object({
8
+ url: z
9
+ .url()
10
+ .optional()
11
+ .describe("URL of the invoice file to fetch and parse"),
12
+ file: z
13
+ .file()
14
+ .mime([
15
+ "application/pdf",
16
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
17
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
18
+ "text/csv",
19
+ "application/csv",
20
+ "image/png",
21
+ "image/jpeg",
22
+ ])
23
+ .optional()
24
+ .describe("Invoice file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
25
+ base64: z.string().optional().describe("Base64-encoded invoice file content"),
26
+ model: specializedParseModelSchema.describe("Model tier for parsing. " +
27
+ "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
28
+ "'pro': Extracts structured invoice fields with standard accuracy. " +
29
+ "'max': Extracts structured invoice fields with highest accuracy and fallback."),
30
+ });
31
+ const parseOutputSchema = z.object({
32
+ markdown: z.string().describe("Extracted text content from the invoice"),
33
+ pageCount: z.number().int().describe("Total number of pages in the document"),
34
+ model: z
35
+ .enum(["pro", "max"])
36
+ .describe("Model tier used to parse the invoice"),
37
+ requestId: z
38
+ .number()
39
+ .int()
40
+ .describe("Unique request identifier for this API call"),
41
+ html: z
42
+ .string()
43
+ .optional()
44
+ .describe("Full HTML representation of the document content. Only available when using the 'max' model. " +
45
+ "Preserves rich formatting, tables, selection marks, and visual layout that cannot be fully represented in markdown."),
46
+ documentId: z
47
+ .string()
48
+ .optional()
49
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
50
+ });
51
+ const requestExamples = {
52
+ "Parse from URL": {
53
+ summary: "Parse from URL",
54
+ value: {
55
+ url: "https://example.com/invoice.pdf",
56
+ },
57
+ },
58
+ "Parse from base64": {
59
+ summary: "Parse from base64",
60
+ value: {
61
+ base64: "JVBERi0xLjAKMSAwIG9iajw8L1R5cGUvQ2F0YWxvZy9QYWdlcyAyIDAgUj4+ZW5kb2JqIDIgMCBvYmo8PC9UeXBlL1BhZ2VzL0tpZHNbMyAwIFJdL0NvdW50IDE+PmVuZG9iaiAzIDAgb2JqPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCAzIDNdL1BhcmVudCAyIDAgUj4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAwOSAwMDAwMCBuIAowMDAwMDAwMDU4IDAwMDAwIG4gCjAwMDAwMDAxMTUgMDAwMDAgbiAKdHJhaWxlcjw8L1NpemUgNC9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjE5MAolJUVPRg==",
62
+ },
63
+ },
64
+ "Parse from file upload": {
65
+ summary: "Parse from file upload",
66
+ value: {
67
+ file: "(binary)",
68
+ },
69
+ },
70
+ };
71
+ export const parse = oc
72
+ .route({
73
+ summary: "Parse an invoice",
74
+ description: "Extract text and structured data from an invoice. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
75
+ tags: ["Invoice"],
76
+ spec: (op) => getDefaultSpec(op, requestExamples),
77
+ })
78
+ .input(parseInputSchema)
79
+ .output(parseOutputSchema);
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @pdfvector/instance-contract
2
2
 
3
+ ## 0.0.19
4
+ ### Patch Changes
5
+
6
+
7
+
8
+ - [#116](https://github.com/phuctm97/pdfvector/pull/116) [`c9bf7f0`](https://github.com/phuctm97/pdfvector/commit/c9bf7f05be2be778818306010cf1b484e4233702) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Add HTML field to parse response and rename Identity Document tag to Identity
9
+
10
+ ## 0.0.18
11
+ ### Patch Changes
12
+
13
+
14
+
15
+ - [#115](https://github.com/phuctm97/pdfvector/pull/115) [`f726901`](https://github.com/phuctm97/pdfvector/commit/f7269017780da691cf7fc3c9463a28050ad8453f) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Add identity, invoice, and bank statement parse/ask/extract APIs
16
+
3
17
  ## 0.0.17
4
18
  ### Patch Changes
5
19
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pdfvector/instance-contract",
3
- "version": "0.0.17",
3
+ "version": "0.0.19",
4
4
  "type": "module",
5
5
  "main": ".tsc/lib/index.js",
6
6
  "dependencies": {