@pdfvector/instance-contract 0.0.27 → 0.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
3
  import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { documentAskModelDescription, outputAskModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
4
5
  import { getDefaultSpec } from "./get-default-spec";
5
6
  const askInputSchema = z.object({
6
7
  url: z
@@ -9,17 +10,9 @@ const askInputSchema = z.object({
9
10
  .describe("URL of the document file to fetch and parse"),
10
11
  file: z
11
12
  .file()
12
- .mime([
13
- "application/pdf",
14
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
15
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
16
- "text/csv",
17
- "application/csv",
18
- "image/png",
19
- "image/jpeg",
20
- ])
13
+ .mime([...supportedFileMimes])
21
14
  .optional()
22
- .describe("Document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
15
+ .describe(`Document file upload via multipart form-data (${supportedFileFormatsDescription})`),
23
16
  base64: z
24
17
  .string()
25
18
  .optional()
@@ -32,12 +25,7 @@ const askInputSchema = z.object({
32
25
  .enum(["auto", ...pdfvectorModelSchema.options])
33
26
  .optional()
34
27
  .default("auto")
35
- .describe("Model tier for answering the question. " +
36
- "'auto' (default): Automatically selects the best tier based on document complexity. " +
37
- "'nano': 2 credits/page. Fastest and cheapest. Best for simple questions about straightforward documents. Supports PDF, Word, Excel, CSV. " +
38
- "'mini': 4 credits/page. Balanced speed and accuracy. Handles moderately complex questions. Supports PDF, Word, Excel, CSV. " +
39
- "'pro': 8 credits/page. High accuracy for nuanced questions about complex documents. Supports PDF, Word, Excel, CSV, Image. " +
40
- "'max': 16 credits/page. Maximum accuracy with the most capable models. Best for difficult questions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
28
+ .describe(documentAskModelDescription),
41
29
  callback: z
42
30
  .object({
43
31
  url: z
@@ -61,11 +49,7 @@ const askOutputSchema = z
61
49
  .number()
62
50
  .int()
63
51
  .describe("Total number of pages in the document"),
64
- model: pdfvectorModelSchema.describe("Model tier used to answer the question. " +
65
- "'nano': Fastest, best for simple questions. Supports PDF, Word, Excel, CSV. " +
66
- "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
67
- "'pro': High accuracy for complex questions. Supports PDF, Word, Excel, CSV, Image. " +
68
- "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
52
+ model: pdfvectorModelSchema.describe(outputAskModelDescription),
69
53
  credits: z
70
54
  .number()
71
55
  .int()
@@ -132,7 +116,7 @@ const requestExamples = {
132
116
  export const ask = oc
133
117
  .route({
134
118
  summary: "Ask a question about a document",
135
- description: "Parse a document and answer a question about its content using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Files up to 1000 pages and up to 500MB in size. Provide the document via file upload, a public URL, or a base64-encoded string.",
119
+ description: `Parse a document and answer a question about its content using AI. Supports ${supportedFileTypesLong}. Files up to 1000 pages and up to 500MB in size. Provide the document via file upload, a public URL, or a base64-encoded string.`,
136
120
  tags: ["Document"],
137
121
  spec: (op) => getDefaultSpec(op, requestExamples),
138
122
  })
@@ -1,6 +1,7 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
3
  import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { documentExtractModelDescription, outputExtractModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
4
5
  import { getDefaultSpec } from "./get-default-spec";
5
6
  const extractInputSchema = z.object({
6
7
  url: z
@@ -9,17 +10,9 @@ const extractInputSchema = z.object({
9
10
  .describe("URL of the document file to fetch and parse"),
10
11
  file: z
11
12
  .file()
12
- .mime([
13
- "application/pdf",
14
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
15
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
16
- "text/csv",
17
- "application/csv",
18
- "image/png",
19
- "image/jpeg",
20
- ])
13
+ .mime([...supportedFileMimes])
21
14
  .optional()
22
- .describe("Document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
15
+ .describe(`Document file upload via multipart form-data (${supportedFileFormatsDescription})`),
23
16
  base64: z
24
17
  .string()
25
18
  .optional()
@@ -59,12 +52,7 @@ const extractInputSchema = z.object({
59
52
  .enum(["auto", ...pdfvectorModelSchema.options])
60
53
  .optional()
61
54
  .default("auto")
62
- .describe("Model tier for extracting structured data. " +
63
- "'auto' (default): Automatically selects the best tier based on document complexity. " +
64
- "'nano': 2 credits/page. Fastest and cheapest. Best for simple documents with straightforward schemas. Supports PDF, Word, Excel, CSV. " +
65
- "'mini': 4 credits/page. Balanced speed and accuracy. Handles moderately complex documents and schemas. Supports PDF, Word, Excel, CSV. " +
66
- "'pro': 8 credits/page. High accuracy for complex documents with large or nested schemas. Supports PDF, Word, Excel, CSV, Image. " +
67
- "'max': 16 credits/page. Maximum accuracy with the most capable models. Best for difficult extractions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
55
+ .describe(documentExtractModelDescription),
68
56
  callback: z
69
57
  .object({
70
58
  url: z
@@ -92,11 +80,7 @@ const extractOutputSchema = z
92
80
  .number()
93
81
  .int()
94
82
  .describe("Total number of pages in the document"),
95
- model: pdfvectorModelSchema.describe("Model tier used to extract the data. " +
96
- "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
97
- "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
98
- "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
99
- "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
83
+ model: pdfvectorModelSchema.describe(outputExtractModelDescription),
100
84
  credits: z
101
85
  .number()
102
86
  .int()
@@ -175,7 +159,7 @@ const requestExamples = {
175
159
  export const extract = oc
176
160
  .route({
177
161
  summary: "Extract structured data from a document",
178
- description: "Parse a document and extract structured data matching a provided JSON Schema using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Files up to 1000 pages and up to 500MB in size. Provide the document via file upload, a public URL, or a base64-encoded string.",
162
+ description: `Parse a document and extract structured data matching a provided JSON Schema using AI. Supports ${supportedFileTypesLong}. Files up to 1000 pages and up to 500MB in size. Provide the document via file upload, a public URL, or a base64-encoded string.`,
179
163
  tags: ["Document"],
180
164
  spec: (op) => getDefaultSpec(op, requestExamples),
181
165
  })
@@ -1,6 +1,7 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
3
  import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { documentParseModelDescription, outputModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
4
5
  import { getDefaultSpec } from "./get-default-spec";
5
6
  const parseInputSchema = z.object({
6
7
  url: z
@@ -9,17 +10,9 @@ const parseInputSchema = z.object({
9
10
  .describe("URL of the document file to fetch and parse"),
10
11
  file: z
11
12
  .file()
12
- .mime([
13
- "application/pdf",
14
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
15
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
16
- "text/csv",
17
- "application/csv",
18
- "image/png",
19
- "image/jpeg",
20
- ])
13
+ .mime([...supportedFileMimes])
21
14
  .optional()
22
- .describe("Document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
15
+ .describe(`Document file upload via multipart form-data (${supportedFileFormatsDescription})`),
23
16
  base64: z
24
17
  .string()
25
18
  .optional()
@@ -28,12 +21,7 @@ const parseInputSchema = z.object({
28
21
  .enum(["auto", ...pdfvectorModelSchema.options])
29
22
  .optional()
30
23
  .default("auto")
31
- .describe("Model tier for parsing. " +
32
- "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image. " +
33
- "'nano': 1 credit/page. For simple documents with plain text content. File up to 30 pages, up to 10MB in size. Supports PDF, Word, Excel, CSV. " +
34
- "'mini': 2 credits/page. For documents with tables and structured content. File up to 30 pages, up to 10MB in size. Supports PDF, Word, Excel, CSV. " +
35
- "'pro': 4 credits/page. For documents up to 30 pages with tables, handwritten text, figures, math, and Arabic. File up to 30 pages, up to 40MB in size. Supports PDF, Word, Excel, CSV, Image. " +
36
- "'max': 8 credits/page. For large documents with full Pro capabilities plus enhanced multilingual support. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image."),
24
+ .describe(documentParseModelDescription),
37
25
  callback: z
38
26
  .object({
39
27
  url: z
@@ -53,11 +41,7 @@ const parseInputSchema = z.object({
53
41
  const parseOutputSchema = z.object({
54
42
  markdown: z.string().describe("Extracted text content from the document"),
55
43
  pageCount: z.number().int().describe("Total number of pages in the document"),
56
- model: pdfvectorModelSchema.describe("Model tier used to parse the document. " +
57
- "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
58
- "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
59
- "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
60
- "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
44
+ model: pdfvectorModelSchema.describe(outputModelDescription),
61
45
  credits: z
62
46
  .number()
63
47
  .int()
@@ -120,7 +104,7 @@ const requestExamples = {
120
104
  export const parse = oc
121
105
  .route({
122
106
  summary: "Parse a document",
123
- description: "Extract text and page count from a document. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
107
+ description: `Extract text and page count from a document. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
124
108
  tags: ["Document"],
125
109
  spec: (op) => getDefaultSpec(op, requestExamples),
126
110
  })
@@ -1,5 +1,6 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
+ import { supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
3
4
  const inputSchema = z.object({
4
5
  url: z
5
6
  .url()
@@ -7,17 +8,9 @@ const inputSchema = z.object({
7
8
  .describe("URL of the bank statement file to fetch and parse"),
8
9
  file: z
9
10
  .file()
10
- .mime([
11
- "application/pdf",
12
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
13
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
14
- "text/csv",
15
- "application/csv",
16
- "image/png",
17
- "image/jpeg",
18
- ])
11
+ .mime([...supportedFileMimes])
19
12
  .optional()
20
- .describe("Bank statement file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
13
+ .describe(`Bank statement file upload via multipart form-data (${supportedFileFormatsDescription})`),
21
14
  base64: z
22
15
  .string()
23
16
  .optional()
@@ -70,7 +63,7 @@ const responseExample = {
70
63
  export const bankStatementParse = oc
71
64
  .route({
72
65
  summary: "Free: Parse a bank statement",
73
- description: "Extract text and structured data from a bank statement. Supports PDF, Word, Excel, CSV, and Image files. Free tier: max 5 pages, max 5 MB. Rate limited to 5 requests per IP per day.",
66
+ description: `Extract text and structured data from a bank statement. Supports ${supportedFileTypesLong}. Free tier: max 5 pages, max 5 MB. Rate limited to 5 requests per IP per day.`,
74
67
  tags: ["Free"],
75
68
  spec: (op) => {
76
69
  op.security = [];
@@ -1,6 +1,7 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
3
  import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { outputAskModelDescription, specializedAskModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
4
5
  import { getDefaultSpec } from "./get-default-spec";
5
6
  const specializedModelSchema = z
6
7
  .enum(["auto", ...pdfvectorModelSchema.options])
@@ -12,17 +13,9 @@ const askInputSchema = z.object({
12
13
  .describe("URL of the identity document file to fetch and parse"),
13
14
  file: z
14
15
  .file()
15
- .mime([
16
- "application/pdf",
17
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
- "text/csv",
20
- "application/csv",
21
- "image/png",
22
- "image/jpeg",
23
- ])
16
+ .mime([...supportedFileMimes])
24
17
  .optional()
25
- .describe("Identity document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
18
+ .describe(`Identity document file upload via multipart form-data (${supportedFileFormatsDescription})`),
26
19
  base64: z
27
20
  .string()
28
21
  .optional()
@@ -31,12 +24,7 @@ const askInputSchema = z.object({
31
24
  .string()
32
25
  .min(4, "question must be at least 4 characters")
33
26
  .describe("The question to answer about the identity document"),
34
- model: specializedModelSchema.describe("Model tier for answering the question. " +
35
- "'auto' (default): Automatically selects the best tier based on document complexity. " +
36
- "'nano': 6 credits/page. Fastest and cheapest. Best for simple questions about straightforward documents. Supports PDF, Word, Excel, CSV. " +
37
- "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex questions. Supports PDF, Word, Excel, CSV. " +
38
- "'pro': 14 credits/page. High accuracy for nuanced questions about complex documents. Supports PDF, Word, Excel, CSV, Image. " +
39
- "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult questions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
27
+ model: specializedModelSchema.describe(specializedAskModelDescription),
40
28
  callback: z
41
29
  .object({
42
30
  url: z
@@ -56,11 +44,7 @@ const askInputSchema = z.object({
56
44
  const askOutputSchema = z.object({
57
45
  markdown: z.string().describe("The answer to the question"),
58
46
  pageCount: z.number().int().describe("Total number of pages in the document"),
59
- model: pdfvectorModelSchema.describe("Model tier used to answer the question. " +
60
- "'nano': Fastest, best for simple questions. Supports PDF, Word, Excel, CSV. " +
61
- "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
62
- "'pro': High accuracy for complex questions. Supports PDF, Word, Excel, CSV, Image. " +
63
- "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
47
+ model: pdfvectorModelSchema.describe(outputAskModelDescription),
64
48
  credits: z
65
49
  .number()
66
50
  .int()
@@ -93,7 +77,7 @@ const requestExamples = {
93
77
  export const ask = oc
94
78
  .route({
95
79
  summary: "Ask a question about an identity document",
96
- description: "Parse an identity document and answer a question about its content using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
80
+ description: `Parse an identity document and answer a question about its content using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
97
81
  tags: ["Identity"],
98
82
  spec: (op) => getDefaultSpec(op, requestExamples),
99
83
  })
@@ -1,6 +1,7 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
3
  import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { outputExtractModelDescription, specializedExtractModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
4
5
  import { getDefaultSpec } from "./get-default-spec";
5
6
  const specializedModelSchema = z
6
7
  .enum(["auto", ...pdfvectorModelSchema.options])
@@ -12,17 +13,9 @@ const extractInputSchema = z.object({
12
13
  .describe("URL of the identity document file to fetch and parse"),
13
14
  file: z
14
15
  .file()
15
- .mime([
16
- "application/pdf",
17
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
- "text/csv",
20
- "application/csv",
21
- "image/png",
22
- "image/jpeg",
23
- ])
16
+ .mime([...supportedFileMimes])
24
17
  .optional()
25
- .describe("Identity document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
18
+ .describe(`Identity document file upload via multipart form-data (${supportedFileFormatsDescription})`),
26
19
  base64: z
27
20
  .string()
28
21
  .optional()
@@ -57,12 +50,7 @@ const extractInputSchema = z.object({
57
50
  }),
58
51
  ])
59
52
  .describe("JSON Schema describing the structure of the data to extract from the identity document. Can be a JSON object or a JSON string."),
60
- model: specializedModelSchema.describe("Model tier for extracting structured data. " +
61
- "'auto' (default): Automatically selects the best tier based on document complexity. " +
62
- "'nano': 6 credits/page. Fastest and cheapest. Best for simple documents with straightforward schemas. Supports PDF, Word, Excel, CSV. " +
63
- "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex documents and schemas. Supports PDF, Word, Excel, CSV. " +
64
- "'pro': 14 credits/page. High accuracy for complex documents with large or nested schemas. Supports PDF, Word, Excel, CSV, Image. " +
65
- "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult extractions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
53
+ model: specializedModelSchema.describe(specializedExtractModelDescription),
66
54
  callback: z
67
55
  .object({
68
56
  url: z
@@ -86,11 +74,7 @@ const extractOutputSchema = z.object({
86
74
  (typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
87
75
  .describe("Extracted structured data matching the provided JSON Schema"),
88
76
  pageCount: z.number().int().describe("Total number of pages in the document"),
89
- model: pdfvectorModelSchema.describe("Model tier used to extract the data. " +
90
- "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
91
- "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
92
- "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
93
- "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
77
+ model: pdfvectorModelSchema.describe(outputExtractModelDescription),
94
78
  credits: z
95
79
  .number()
96
80
  .int()
@@ -125,7 +109,7 @@ const requestExamples = {
125
109
  export const extract = oc
126
110
  .route({
127
111
  summary: "Extract structured data from an identity document",
128
- description: "Parse an identity document and extract structured data matching a provided JSON Schema using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
112
+ description: `Parse an identity document and extract structured data matching a provided JSON Schema using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
129
113
  tags: ["Identity"],
130
114
  spec: (op) => getDefaultSpec(op, requestExamples),
131
115
  })
@@ -1,5 +1,6 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
+ import { specializedParseModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
3
4
  import { getDefaultSpec } from "./get-default-spec";
4
5
  const specializedParseModelSchema = z
5
6
  .enum(["pro", "max", "auto"], {
@@ -13,25 +14,14 @@ const parseInputSchema = z.object({
13
14
  .describe("URL of the identity document file to fetch and parse"),
14
15
  file: z
15
16
  .file()
16
- .mime([
17
- "application/pdf",
18
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
19
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
20
- "text/csv",
21
- "application/csv",
22
- "image/png",
23
- "image/jpeg",
24
- ])
17
+ .mime([...supportedFileMimes])
25
18
  .optional()
26
- .describe("Identity document file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
19
+ .describe(`Identity document file upload via multipart form-data (${supportedFileFormatsDescription})`),
27
20
  base64: z
28
21
  .string()
29
22
  .optional()
30
23
  .describe("Base64-encoded identity document file content"),
31
- model: specializedParseModelSchema.describe("Model tier for parsing. " +
32
- "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
33
- "'pro': 6 credits/page. Extracts structured identity document fields with standard accuracy. " +
34
- "'max': 10 credits/page. Extracts structured identity document fields with highest accuracy."),
24
+ model: specializedParseModelSchema.describe(specializedParseModelDescription("identity document")),
35
25
  callback: z
36
26
  .object({
37
27
  url: z
@@ -101,7 +91,7 @@ const requestExamples = {
101
91
  export const parse = oc
102
92
  .route({
103
93
  summary: "Parse an identity document",
104
- description: "Extract text and structured data from an identity document. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
94
+ description: `Extract text and structured data from an identity document. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
105
95
  tags: ["Identity"],
106
96
  spec: (op) => getDefaultSpec(op, requestExamples),
107
97
  })
@@ -1,6 +1,7 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
3
  import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { outputAskModelDescription, specializedAskModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
4
5
  import { getDefaultSpec } from "./get-default-spec";
5
6
  const specializedModelSchema = z
6
7
  .enum(["auto", ...pdfvectorModelSchema.options])
@@ -12,28 +13,15 @@ const askInputSchema = z.object({
12
13
  .describe("URL of the invoice file to fetch and parse"),
13
14
  file: z
14
15
  .file()
15
- .mime([
16
- "application/pdf",
17
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
- "text/csv",
20
- "application/csv",
21
- "image/png",
22
- "image/jpeg",
23
- ])
16
+ .mime([...supportedFileMimes])
24
17
  .optional()
25
- .describe("Invoice file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
18
+ .describe(`Invoice file upload via multipart form-data (${supportedFileFormatsDescription})`),
26
19
  base64: z.string().optional().describe("Base64-encoded invoice file content"),
27
20
  question: z
28
21
  .string()
29
22
  .min(4, "question must be at least 4 characters")
30
23
  .describe("The question to answer about the invoice"),
31
- model: specializedModelSchema.describe("Model tier for answering the question. " +
32
- "'auto' (default): Automatically selects the best tier based on document complexity. " +
33
- "'nano': 6 credits/page. Fastest and cheapest. Best for simple questions about straightforward documents. Supports PDF, Word, Excel, CSV. " +
34
- "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex questions. Supports PDF, Word, Excel, CSV. " +
35
- "'pro': 14 credits/page. High accuracy for nuanced questions about complex documents. Supports PDF, Word, Excel, CSV, Image. " +
36
- "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult questions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
24
+ model: specializedModelSchema.describe(specializedAskModelDescription),
37
25
  callback: z
38
26
  .object({
39
27
  url: z
@@ -53,11 +41,7 @@ const askInputSchema = z.object({
53
41
  const askOutputSchema = z.object({
54
42
  markdown: z.string().describe("The answer to the question"),
55
43
  pageCount: z.number().int().describe("Total number of pages in the document"),
56
- model: pdfvectorModelSchema.describe("Model tier used to answer the question. " +
57
- "'nano': Fastest, best for simple questions. Supports PDF, Word, Excel, CSV. " +
58
- "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
59
- "'pro': High accuracy for complex questions. Supports PDF, Word, Excel, CSV, Image. " +
60
- "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
44
+ model: pdfvectorModelSchema.describe(outputAskModelDescription),
61
45
  credits: z
62
46
  .number()
63
47
  .int()
@@ -83,7 +67,7 @@ const requestExamples = {
83
67
  export const ask = oc
84
68
  .route({
85
69
  summary: "Ask a question about an invoice",
86
- description: "Parse an invoice and answer a question about its content using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
70
+ description: `Parse an invoice and answer a question about its content using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
87
71
  tags: ["Invoice"],
88
72
  spec: (op) => getDefaultSpec(op, requestExamples),
89
73
  })
@@ -1,6 +1,7 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
3
  import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
+ import { outputExtractModelDescription, specializedExtractModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
4
5
  import { getDefaultSpec } from "./get-default-spec";
5
6
  const specializedModelSchema = z
6
7
  .enum(["auto", ...pdfvectorModelSchema.options])
@@ -12,17 +13,9 @@ const extractInputSchema = z.object({
12
13
  .describe("URL of the invoice file to fetch and parse"),
13
14
  file: z
14
15
  .file()
15
- .mime([
16
- "application/pdf",
17
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
19
- "text/csv",
20
- "application/csv",
21
- "image/png",
22
- "image/jpeg",
23
- ])
16
+ .mime([...supportedFileMimes])
24
17
  .optional()
25
- .describe("Invoice file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
18
+ .describe(`Invoice file upload via multipart form-data (${supportedFileFormatsDescription})`),
26
19
  base64: z.string().optional().describe("Base64-encoded invoice file content"),
27
20
  prompt: z
28
21
  .string()
@@ -54,12 +47,7 @@ const extractInputSchema = z.object({
54
47
  }),
55
48
  ])
56
49
  .describe("JSON Schema describing the structure of the data to extract from the invoice. Can be a JSON object or a JSON string."),
57
- model: specializedModelSchema.describe("Model tier for extracting structured data. " +
58
- "'auto' (default): Automatically selects the best tier based on document complexity. " +
59
- "'nano': 6 credits/page. Fastest and cheapest. Best for simple documents with straightforward schemas. Supports PDF, Word, Excel, CSV. " +
60
- "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex documents and schemas. Supports PDF, Word, Excel, CSV. " +
61
- "'pro': 14 credits/page. High accuracy for complex documents with large or nested schemas. Supports PDF, Word, Excel, CSV, Image. " +
62
- "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult extractions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
50
+ model: specializedModelSchema.describe(specializedExtractModelDescription),
63
51
  callback: z
64
52
  .object({
65
53
  url: z
@@ -83,11 +71,7 @@ const extractOutputSchema = z.object({
83
71
  (typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
84
72
  .describe("Extracted structured data matching the provided JSON Schema"),
85
73
  pageCount: z.number().int().describe("Total number of pages in the document"),
86
- model: pdfvectorModelSchema.describe("Model tier used to extract the data. " +
87
- "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
88
- "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
89
- "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
90
- "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
74
+ model: pdfvectorModelSchema.describe(outputExtractModelDescription),
91
75
  credits: z
92
76
  .number()
93
77
  .int()
@@ -131,7 +115,7 @@ const requestExamples = {
131
115
  export const extract = oc
132
116
  .route({
133
117
  summary: "Extract structured data from an invoice",
134
- description: "Parse an invoice and extract structured data matching a provided JSON Schema using AI. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
118
+ description: `Parse an invoice and extract structured data matching a provided JSON Schema using AI. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
135
119
  tags: ["Invoice"],
136
120
  spec: (op) => getDefaultSpec(op, requestExamples),
137
121
  })
@@ -1,5 +1,6 @@
1
1
  import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
+ import { specializedParseModelDescription, supportedFileFormatsDescription, supportedFileMimes, supportedFileTypesLong, } from "../../supported-mimes";
3
4
  import { getDefaultSpec } from "./get-default-spec";
4
5
  const specializedParseModelSchema = z
5
6
  .enum(["pro", "max", "auto"], {
@@ -13,22 +14,11 @@ const parseInputSchema = z.object({
13
14
  .describe("URL of the invoice file to fetch and parse"),
14
15
  file: z
15
16
  .file()
16
- .mime([
17
- "application/pdf",
18
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
19
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
20
- "text/csv",
21
- "application/csv",
22
- "image/png",
23
- "image/jpeg",
24
- ])
17
+ .mime([...supportedFileMimes])
25
18
  .optional()
26
- .describe("Invoice file upload via multipart form-data (PDF, DOCX, XLSX, CSV, PNG, JPG)"),
19
+ .describe(`Invoice file upload via multipart form-data (${supportedFileFormatsDescription})`),
27
20
  base64: z.string().optional().describe("Base64-encoded invoice file content"),
28
- model: specializedParseModelSchema.describe("Model tier for parsing. " +
29
- "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
30
- "'pro': 6 credits/page. Extracts structured invoice fields with standard accuracy. " +
31
- "'max': 10 credits/page. Extracts structured invoice fields with highest accuracy and fallback."),
21
+ model: specializedParseModelSchema.describe(specializedParseModelDescription("invoice")),
32
22
  callback: z
33
23
  .object({
34
24
  url: z
@@ -92,7 +82,7 @@ const requestExamples = {
92
82
  export const parse = oc
93
83
  .route({
94
84
  summary: "Parse an invoice",
95
- description: "Extract text and structured data from an invoice. Supports PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg) files. Provide the document via file upload, a public URL, or a base64-encoded string.",
85
+ description: `Extract text and structured data from an invoice. Supports ${supportedFileTypesLong}. Provide the document via file upload, a public URL, or a base64-encoded string.`,
96
86
  tags: ["Invoice"],
97
87
  spec: (op) => getDefaultSpec(op, requestExamples),
98
88
  })
@@ -0,0 +1,29 @@
1
+ /**
2
+ * All MIME types accepted for file uploads across all API endpoints.
3
+ * Single source of truth — imported by all contract schemas.
4
+ */
5
+ export declare const supportedFileMimes: readonly ["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "text/csv", "application/csv", "image/png", "image/jpeg", "image/tiff", "image/bmp", "image/heif", "image/heic", "text/plain", "text/markdown", "text/tab-separated-values", "text/xml", "application/xml", "application/rtf", "text/rtf", "text/html", "application/epub+zip", "application/vnd.oasis.opendocument.text", "application/vnd.oasis.opendocument.spreadsheet", "application/vnd.oasis.opendocument.presentation", "application/x-bibtex"];
6
+ export declare const supportedFileFormatsDescription = "PDF, DOCX, XLSX, PPTX, CSV, PNG, JPG, TIFF, BMP, HEIF, TXT, MD, TSV, XML, RTF, HTML, ODT, ODS, ODP, EPUB, BIB, RIS, NBIB, ENW";
7
+ /**
8
+ * Human-readable description of supported file types with extensions.
9
+ * Used in route-level API descriptions.
10
+ */
11
+ export declare const supportedFileTypesLong: string;
12
+ /** Model tier descriptions for document parse endpoints. */
13
+ export declare const documentParseModelDescription: string;
14
+ /** Model tier descriptions for document extract endpoints. */
15
+ export declare const documentExtractModelDescription: string;
16
+ /** Model tier descriptions for document ask endpoints. */
17
+ export declare const documentAskModelDescription: string;
18
+ /** Model tier descriptions for invoice/identity/bankStatement parse endpoints (pro/max/auto only). */
19
+ export declare const specializedParseModelDescription: (type: string) => string;
20
+ /** Model tier descriptions for invoice/identity/bankStatement extract endpoints. */
21
+ export declare const specializedExtractModelDescription: string;
22
+ /** Model tier descriptions for invoice/identity/bankStatement ask endpoints. */
23
+ export declare const specializedAskModelDescription: string;
24
+ /** Output model description for parse results. */
25
+ export declare const outputModelDescription: string;
26
+ /** Output model description for extract results. */
27
+ export declare const outputExtractModelDescription: string;
28
+ /** Output model description for ask results. */
29
+ export declare const outputAskModelDescription: string;