@pdfvector/instance-contract 0.0.25 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@ export declare const getUsageRecords: import("@orpc/contract").ContractProcedure
17
17
  operation: z.ZodString;
18
18
  pageCount: z.ZodNullable<z.ZodNumber>;
19
19
  requestedModel: z.ZodString;
20
+ actualModel: z.ZodNullable<z.ZodString>;
20
21
  apiKey: z.ZodNullable<z.ZodString>;
21
22
  workspaceId: z.ZodNullable<z.ZodNumber>;
22
23
  createdAt: z.ZodNumber;
@@ -14,6 +14,7 @@ const usageRecordSchema = z.object({
14
14
  operation: z.string(),
15
15
  pageCount: z.number().nullable(),
16
16
  requestedModel: z.string(),
17
+ actualModel: z.string().nullable(),
17
18
  apiKey: z.string().nullable(),
18
19
  workspaceId: z.number().nullable(),
19
20
  createdAt: z.number(),
@@ -32,11 +32,11 @@ const askInputSchema = z.object({
32
32
  .min(4, "question must be at least 4 characters")
33
33
  .describe("The question to answer about the bank statement"),
34
34
  model: specializedModelSchema.describe("Model tier for answering the question. " +
35
- "'auto' (default): Automatically selects the best tier. " +
36
- "'nano': Uses lightweight models. " +
37
- "'mini': Uses mid-range models. " +
38
- "'pro': Uses capable models. " +
39
- "'max': Uses the most powerful models."),
35
+ "'auto' (default): Automatically selects the best tier based on document complexity. " +
36
+ "'nano': 6 credits/page. Fastest and cheapest. Best for simple questions about straightforward documents. Supports PDF, Word, Excel, CSV. " +
37
+ "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex questions. Supports PDF, Word, Excel, CSV. " +
38
+ "'pro': 14 credits/page. High accuracy for nuanced questions about complex documents. Supports PDF, Word, Excel, CSV, Image. " +
39
+ "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult questions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
40
40
  callback: z
41
41
  .object({
42
42
  url: z
@@ -56,11 +56,15 @@ const askInputSchema = z.object({
56
56
  const askOutputSchema = z.object({
57
57
  markdown: z.string().describe("The answer to the question"),
58
58
  pageCount: z.number().int().describe("Total number of pages in the document"),
59
- model: pdfvectorModelSchema.describe("Model tier used to answer the question"),
59
+ model: pdfvectorModelSchema.describe("Model tier used to answer the question. " +
60
+ "'nano': Fastest, best for simple questions. Supports PDF, Word, Excel, CSV. " +
61
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
62
+ "'pro': High accuracy for complex questions. Supports PDF, Word, Excel, CSV, Image. " +
63
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
60
64
  credits: z
61
65
  .number()
62
66
  .int()
63
- .describe("Number of credits consumed by this API call"),
67
+ .describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
64
68
  requestId: z
65
69
  .number()
66
70
  .int()
@@ -40,7 +40,7 @@ const extractInputSchema = z.object({
40
40
  const parsed = JSON.parse(trimmed);
41
41
  if (typeof parsed !== "object" || parsed === null) {
42
42
  ctx.addIssue({
43
- code: z.ZodIssueCode.custom,
43
+ code: "custom",
44
44
  message: "Schema must be a JSON object",
45
45
  });
46
46
  return z.NEVER;
@@ -49,7 +49,7 @@ const extractInputSchema = z.object({
49
49
  }
50
50
  catch {
51
51
  ctx.addIssue({
52
- code: z.ZodIssueCode.custom,
52
+ code: "custom",
53
53
  message: "Invalid JSON string for schema",
54
54
  });
55
55
  return z.NEVER;
@@ -58,11 +58,11 @@ const extractInputSchema = z.object({
58
58
  ])
59
59
  .describe("JSON Schema describing the structure of the data to extract from the bank statement. Can be a JSON object or a JSON string."),
60
60
  model: specializedModelSchema.describe("Model tier for extracting structured data. " +
61
- "'auto' (default): Automatically selects the best tier. " +
62
- "'nano': Uses lightweight models. " +
63
- "'mini': Uses mid-range models. " +
64
- "'pro': Uses capable models. " +
65
- "'max': Uses the most powerful models."),
61
+ "'auto' (default): Automatically selects the best tier based on document complexity. " +
62
+ "'nano': 6 credits/page. Fastest and cheapest. Best for simple documents with straightforward schemas. Supports PDF, Word, Excel, CSV. " +
63
+ "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex documents and schemas. Supports PDF, Word, Excel, CSV. " +
64
+ "'pro': 14 credits/page. High accuracy for complex documents with large or nested schemas. Supports PDF, Word, Excel, CSV, Image. " +
65
+ "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult extractions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
66
66
  callback: z
67
67
  .object({
68
68
  url: z
@@ -82,13 +82,19 @@ const extractInputSchema = z.object({
82
82
  const extractOutputSchema = z.object({
83
83
  data: z
84
84
  .unknown()
85
+ .refine((val) => val != null &&
86
+ (typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
85
87
  .describe("Extracted structured data matching the provided JSON Schema"),
86
88
  pageCount: z.number().int().describe("Total number of pages in the document"),
87
- model: pdfvectorModelSchema.describe("Model tier used to extract the data"),
89
+ model: pdfvectorModelSchema.describe("Model tier used to extract the data. " +
90
+ "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
91
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
92
+ "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
93
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
88
94
  credits: z
89
95
  .number()
90
96
  .int()
91
- .describe("Number of credits consumed by this API call"),
97
+ .describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
92
98
  requestId: z
93
99
  .number()
94
100
  .int()
@@ -30,8 +30,8 @@ const parseInputSchema = z.object({
30
30
  .describe("Base64-encoded bank statement file content"),
31
31
  model: specializedParseModelSchema.describe("Model tier for parsing. " +
32
32
  "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
33
- "'pro': Extracts structured bank statement fields with standard accuracy. " +
34
- "'max': Extracts structured bank statement fields with highest accuracy and fallback."),
33
+ "'pro': 6 credits/page. Extracts structured bank statement fields with standard accuracy. " +
34
+ "'max': 10 credits/page. Extracts structured bank statement fields with highest accuracy and fallback."),
35
35
  callback: z
36
36
  .object({
37
37
  url: z
@@ -59,7 +59,7 @@ const parseOutputSchema = z.object({
59
59
  credits: z
60
60
  .number()
61
61
  .int()
62
- .describe("Number of credits consumed by this API call"),
62
+ .describe("Number of credits consumed by this API call. Cost per page: pro=6, max=10."),
63
63
  requestId: z
64
64
  .number()
65
65
  .int()
@@ -33,11 +33,11 @@ const askInputSchema = z.object({
33
33
  .optional()
34
34
  .default("auto")
35
35
  .describe("Model tier for answering the question. " +
36
- "'auto' (default): Automatically selects the best tier based on document page count and document complexity. " +
37
- "'nano': Uses lightweight models (likely better than GPT-5-nano). Supports PDF, Word, Excel, CSV. " +
38
- "'mini': Uses mid-range models (likely better than GPT-5-mini). Supports PDF, Word, Excel, CSV. " +
39
- "'pro': Uses capable models (likely better than GPT-5.2). Supports PDF, Word, Excel, CSV, Image. " +
40
- "'max': Uses the most powerful models (likely better than Claude Opus 4.6). Supports PDF, Word, Excel, CSV, Image."),
36
+ "'auto' (default): Automatically selects the best tier based on document complexity. " +
37
+ "'nano': 2 credits/page. Fastest and cheapest. Best for simple questions about straightforward documents. Supports PDF, Word, Excel, CSV. " +
38
+ "'mini': 4 credits/page. Balanced speed and accuracy. Handles moderately complex questions. Supports PDF, Word, Excel, CSV. " +
39
+ "'pro': 8 credits/page. High accuracy for nuanced questions about complex documents. Supports PDF, Word, Excel, CSV, Image. " +
40
+ "'max': 16 credits/page. Maximum accuracy with the most capable models. Best for difficult questions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
41
41
  callback: z
42
42
  .object({
43
43
  url: z
@@ -62,14 +62,14 @@ const askOutputSchema = z
62
62
  .int()
63
63
  .describe("Total number of pages in the document"),
64
64
  model: pdfvectorModelSchema.describe("Model tier used to answer the question. " +
65
- "'nano': Supports PDF, Word, Excel, CSV. " +
66
- "'mini': Supports PDF, Word, Excel, CSV. " +
67
- "'pro': Supports PDF, Word, Excel, CSV, Image. " +
68
- "'max': Supports PDF, Word, Excel, CSV, Image."),
65
+ "'nano': Fastest, best for simple questions. Supports PDF, Word, Excel, CSV. " +
66
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
67
+ "'pro': High accuracy for complex questions. Supports PDF, Word, Excel, CSV, Image. " +
68
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
69
69
  credits: z
70
70
  .number()
71
71
  .int()
72
- .describe("Number of credits consumed by this API call"),
72
+ .describe("Number of credits consumed by this API call. Cost per page: nano=2, mini=4, pro=8, max=16."),
73
73
  requestId: z
74
74
  .number()
75
75
  .int()
@@ -38,7 +38,7 @@ const extractInputSchema = z.object({
38
38
  const parsed = JSON.parse(trimmed);
39
39
  if (typeof parsed !== "object" || parsed === null) {
40
40
  ctx.addIssue({
41
- code: z.ZodIssueCode.custom,
41
+ code: "custom",
42
42
  message: "Schema must be a JSON object",
43
43
  });
44
44
  return z.NEVER;
@@ -47,7 +47,7 @@ const extractInputSchema = z.object({
47
47
  }
48
48
  catch {
49
49
  ctx.addIssue({
50
- code: z.ZodIssueCode.custom,
50
+ code: "custom",
51
51
  message: "Invalid JSON string for schema",
52
52
  });
53
53
  return z.NEVER;
@@ -60,11 +60,11 @@ const extractInputSchema = z.object({
60
60
  .optional()
61
61
  .default("auto")
62
62
  .describe("Model tier for extracting structured data. " +
63
- "'auto' (default): Automatically selects the best tier based on document page count and document complexity. " +
64
- "'nano': Uses lightweight models (likely better than GPT-5-nano). Supports PDF, Word, Excel, CSV. " +
65
- "'mini': Uses mid-range models (likely better than GPT-5-mini). Supports PDF, Word, Excel, CSV. " +
66
- "'pro': Uses capable models (likely better than GPT-5.2). Supports PDF, Word, Excel, CSV, Image. " +
67
- "'max': Uses the most powerful models (likely better than Claude Opus 4.6). Supports PDF, Word, Excel, CSV, Image."),
63
+ "'auto' (default): Automatically selects the best tier based on document complexity. " +
64
+ "'nano': 2 credits/page. Fastest and cheapest. Best for simple documents with straightforward schemas. Supports PDF, Word, Excel, CSV. " +
65
+ "'mini': 4 credits/page. Balanced speed and accuracy. Handles moderately complex documents and schemas. Supports PDF, Word, Excel, CSV. " +
66
+ "'pro': 8 credits/page. High accuracy for complex documents with large or nested schemas. Supports PDF, Word, Excel, CSV, Image. " +
67
+ "'max': 16 credits/page. Maximum accuracy with the most capable models. Best for difficult extractions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
68
68
  callback: z
69
69
  .object({
70
70
  url: z
@@ -85,20 +85,22 @@ const extractOutputSchema = z
85
85
  .object({
86
86
  data: z
87
87
  .unknown()
88
+ .refine((val) => val != null &&
89
+ (typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
88
90
  .describe("Extracted structured data matching the provided JSON Schema"),
89
91
  pageCount: z
90
92
  .number()
91
93
  .int()
92
94
  .describe("Total number of pages in the document"),
93
95
  model: pdfvectorModelSchema.describe("Model tier used to extract the data. " +
94
- "'nano': Supports PDF, Word, Excel, CSV. " +
95
- "'mini': Supports PDF, Word, Excel, CSV. " +
96
- "'pro': Supports PDF, Word, Excel, CSV, Image. " +
97
- "'max': Supports PDF, Word, Excel, CSV, Image."),
96
+ "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
97
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
98
+ "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
99
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
98
100
  credits: z
99
101
  .number()
100
102
  .int()
101
- .describe("Number of credits consumed by this API call"),
103
+ .describe("Number of credits consumed by this API call. Cost per page: nano=2, mini=4, pro=8, max=16."),
102
104
  requestId: z
103
105
  .number()
104
106
  .int()
@@ -30,10 +30,10 @@ const parseInputSchema = z.object({
30
30
  .default("auto")
31
31
  .describe("Model tier for parsing. " +
32
32
  "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image. " +
33
- "'nano': For simple documents with plain text content. File up to 30 pages, up to 10MB in size. Supports PDF, Word, Excel, CSV. " +
34
- "'mini': For documents with tables and structured content. File up to 30 pages, up to 10MB in size. Supports PDF, Word, Excel, CSV. " +
35
- "'pro': For documents up to 30 pages with tables, handwritten text, figures, math, and Arabic. File up to 30 pages, up to 40MB in size. Supports PDF, Word, Excel, CSV, Image. " +
36
- "'max': For large documents with full Pro capabilities plus enhanced multilingual support. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image."),
33
+ "'nano': 1 credit/page. For simple documents with plain text content. File up to 30 pages, up to 10MB in size. Supports PDF, Word, Excel, CSV. " +
34
+ "'mini': 2 credits/page. For documents with tables and structured content. File up to 30 pages, up to 10MB in size. Supports PDF, Word, Excel, CSV. " +
35
+ "'pro': 4 credits/page. For documents up to 30 pages with tables, handwritten text, figures, math, and Arabic. File up to 30 pages, up to 40MB in size. Supports PDF, Word, Excel, CSV, Image. " +
36
+ "'max': 8 credits/page. For large documents with full Pro capabilities plus enhanced multilingual support. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image."),
37
37
  callback: z
38
38
  .object({
39
39
  url: z
@@ -54,14 +54,14 @@ const parseOutputSchema = z.object({
54
54
  markdown: z.string().describe("Extracted text content from the document"),
55
55
  pageCount: z.number().int().describe("Total number of pages in the document"),
56
56
  model: pdfvectorModelSchema.describe("Model tier used to parse the document. " +
57
- "'nano': Supports PDF, Word, Excel, CSV. " +
58
- "'mini': Supports PDF, Word, Excel, CSV. " +
59
- "'pro': Supports PDF, Word, Excel, CSV, Image. " +
60
- "'max': Supports PDF, Word, Excel, CSV, Image."),
57
+ "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
58
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
59
+ "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
60
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
61
61
  credits: z
62
62
  .number()
63
63
  .int()
64
- .describe("Number of credits consumed by this API call"),
64
+ .describe("Number of credits consumed by this API call. Cost per page: nano=1, mini=2, pro=4, max=8."),
65
65
  requestId: z
66
66
  .number()
67
67
  .int()
@@ -32,11 +32,11 @@ const askInputSchema = z.object({
32
32
  .min(4, "question must be at least 4 characters")
33
33
  .describe("The question to answer about the identity document"),
34
34
  model: specializedModelSchema.describe("Model tier for answering the question. " +
35
- "'auto' (default): Automatically selects the best tier. " +
36
- "'nano': Uses lightweight models. " +
37
- "'mini': Uses mid-range models. " +
38
- "'pro': Uses capable models. " +
39
- "'max': Uses the most powerful models."),
35
+ "'auto' (default): Automatically selects the best tier based on document complexity. " +
36
+ "'nano': 6 credits/page. Fastest and cheapest. Best for simple questions about straightforward documents. Supports PDF, Word, Excel, CSV. " +
37
+ "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex questions. Supports PDF, Word, Excel, CSV. " +
38
+ "'pro': 14 credits/page. High accuracy for nuanced questions about complex documents. Supports PDF, Word, Excel, CSV, Image. " +
39
+ "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult questions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
40
40
  callback: z
41
41
  .object({
42
42
  url: z
@@ -56,11 +56,15 @@ const askInputSchema = z.object({
56
56
  const askOutputSchema = z.object({
57
57
  markdown: z.string().describe("The answer to the question"),
58
58
  pageCount: z.number().int().describe("Total number of pages in the document"),
59
- model: pdfvectorModelSchema.describe("Model tier used to answer the question"),
59
+ model: pdfvectorModelSchema.describe("Model tier used to answer the question. " +
60
+ "'nano': Fastest, best for simple questions. Supports PDF, Word, Excel, CSV. " +
61
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
62
+ "'pro': High accuracy for complex questions. Supports PDF, Word, Excel, CSV, Image. " +
63
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
60
64
  credits: z
61
65
  .number()
62
66
  .int()
63
- .describe("Number of credits consumed by this API call"),
67
+ .describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
64
68
  requestId: z
65
69
  .number()
66
70
  .int()
@@ -40,7 +40,7 @@ const extractInputSchema = z.object({
40
40
  const parsed = JSON.parse(trimmed);
41
41
  if (typeof parsed !== "object" || parsed === null) {
42
42
  ctx.addIssue({
43
- code: z.ZodIssueCode.custom,
43
+ code: "custom",
44
44
  message: "Schema must be a JSON object",
45
45
  });
46
46
  return z.NEVER;
@@ -49,7 +49,7 @@ const extractInputSchema = z.object({
49
49
  }
50
50
  catch {
51
51
  ctx.addIssue({
52
- code: z.ZodIssueCode.custom,
52
+ code: "custom",
53
53
  message: "Invalid JSON string for schema",
54
54
  });
55
55
  return z.NEVER;
@@ -58,11 +58,11 @@ const extractInputSchema = z.object({
58
58
  ])
59
59
  .describe("JSON Schema describing the structure of the data to extract from the identity document. Can be a JSON object or a JSON string."),
60
60
  model: specializedModelSchema.describe("Model tier for extracting structured data. " +
61
- "'auto' (default): Automatically selects the best tier. " +
62
- "'nano': Uses lightweight models. " +
63
- "'mini': Uses mid-range models. " +
64
- "'pro': Uses capable models. " +
65
- "'max': Uses the most powerful models."),
61
+ "'auto' (default): Automatically selects the best tier based on document complexity. " +
62
+ "'nano': 6 credits/page. Fastest and cheapest. Best for simple documents with straightforward schemas. Supports PDF, Word, Excel, CSV. " +
63
+ "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex documents and schemas. Supports PDF, Word, Excel, CSV. " +
64
+ "'pro': 14 credits/page. High accuracy for complex documents with large or nested schemas. Supports PDF, Word, Excel, CSV, Image. " +
65
+ "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult extractions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
66
66
  callback: z
67
67
  .object({
68
68
  url: z
@@ -82,13 +82,19 @@ const extractInputSchema = z.object({
82
82
  const extractOutputSchema = z.object({
83
83
  data: z
84
84
  .unknown()
85
+ .refine((val) => val != null &&
86
+ (typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
85
87
  .describe("Extracted structured data matching the provided JSON Schema"),
86
88
  pageCount: z.number().int().describe("Total number of pages in the document"),
87
- model: pdfvectorModelSchema.describe("Model tier used to extract the data"),
89
+ model: pdfvectorModelSchema.describe("Model tier used to extract the data. " +
90
+ "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
91
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
92
+ "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
93
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
88
94
  credits: z
89
95
  .number()
90
96
  .int()
91
- .describe("Number of credits consumed by this API call"),
97
+ .describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
92
98
  requestId: z
93
99
  .number()
94
100
  .int()
@@ -30,8 +30,8 @@ const parseInputSchema = z.object({
30
30
  .describe("Base64-encoded identity document file content"),
31
31
  model: specializedParseModelSchema.describe("Model tier for parsing. " +
32
32
  "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
33
- "'pro': Extracts structured identity document fields with standard accuracy. " +
34
- "'max': Extracts structured identity document fields with highest accuracy."),
33
+ "'pro': 6 credits/page. Extracts structured identity document fields with standard accuracy. " +
34
+ "'max': 10 credits/page. Extracts structured identity document fields with highest accuracy."),
35
35
  callback: z
36
36
  .object({
37
37
  url: z
@@ -68,7 +68,7 @@ const parseOutputSchema = z.object({
68
68
  credits: z
69
69
  .number()
70
70
  .int()
71
- .describe("Number of credits consumed by this API call"),
71
+ .describe("Number of credits consumed by this API call. Cost per page: pro=6, max=10."),
72
72
  requestId: z
73
73
  .number()
74
74
  .int()
@@ -29,11 +29,11 @@ const askInputSchema = z.object({
29
29
  .min(4, "question must be at least 4 characters")
30
30
  .describe("The question to answer about the invoice"),
31
31
  model: specializedModelSchema.describe("Model tier for answering the question. " +
32
- "'auto' (default): Automatically selects the best tier. " +
33
- "'nano': Uses lightweight models. " +
34
- "'mini': Uses mid-range models. " +
35
- "'pro': Uses capable models. " +
36
- "'max': Uses the most powerful models."),
32
+ "'auto' (default): Automatically selects the best tier based on document complexity. " +
33
+ "'nano': 6 credits/page. Fastest and cheapest. Best for simple questions about straightforward documents. Supports PDF, Word, Excel, CSV. " +
34
+ "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex questions. Supports PDF, Word, Excel, CSV. " +
35
+ "'pro': 14 credits/page. High accuracy for nuanced questions about complex documents. Supports PDF, Word, Excel, CSV, Image. " +
36
+ "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult questions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
37
37
  callback: z
38
38
  .object({
39
39
  url: z
@@ -53,11 +53,15 @@ const askInputSchema = z.object({
53
53
  const askOutputSchema = z.object({
54
54
  markdown: z.string().describe("The answer to the question"),
55
55
  pageCount: z.number().int().describe("Total number of pages in the document"),
56
- model: pdfvectorModelSchema.describe("Model tier used to answer the question"),
56
+ model: pdfvectorModelSchema.describe("Model tier used to answer the question. " +
57
+ "'nano': Fastest, best for simple questions. Supports PDF, Word, Excel, CSV. " +
58
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
59
+ "'pro': High accuracy for complex questions. Supports PDF, Word, Excel, CSV, Image. " +
60
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
57
61
  credits: z
58
62
  .number()
59
63
  .int()
60
- .describe("Number of credits consumed by this API call"),
64
+ .describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
61
65
  requestId: z
62
66
  .number()
63
67
  .int()
@@ -37,7 +37,7 @@ const extractInputSchema = z.object({
37
37
  const parsed = JSON.parse(trimmed);
38
38
  if (typeof parsed !== "object" || parsed === null) {
39
39
  ctx.addIssue({
40
- code: z.ZodIssueCode.custom,
40
+ code: "custom",
41
41
  message: "Schema must be a JSON object",
42
42
  });
43
43
  return z.NEVER;
@@ -46,7 +46,7 @@ const extractInputSchema = z.object({
46
46
  }
47
47
  catch {
48
48
  ctx.addIssue({
49
- code: z.ZodIssueCode.custom,
49
+ code: "custom",
50
50
  message: "Invalid JSON string for schema",
51
51
  });
52
52
  return z.NEVER;
@@ -55,11 +55,11 @@ const extractInputSchema = z.object({
55
55
  ])
56
56
  .describe("JSON Schema describing the structure of the data to extract from the invoice. Can be a JSON object or a JSON string."),
57
57
  model: specializedModelSchema.describe("Model tier for extracting structured data. " +
58
- "'auto' (default): Automatically selects the best tier. " +
59
- "'nano': Uses lightweight models. " +
60
- "'mini': Uses mid-range models. " +
61
- "'pro': Uses capable models. " +
62
- "'max': Uses the most powerful models."),
58
+ "'auto' (default): Automatically selects the best tier based on document complexity. " +
59
+ "'nano': 6 credits/page. Fastest and cheapest. Best for simple documents with straightforward schemas. Supports PDF, Word, Excel, CSV. " +
60
+ "'mini': 10 credits/page. Balanced speed and accuracy. Handles moderately complex documents and schemas. Supports PDF, Word, Excel, CSV. " +
61
+ "'pro': 14 credits/page. High accuracy for complex documents with large or nested schemas. Supports PDF, Word, Excel, CSV, Image. " +
62
+ "'max': 18 credits/page. Maximum accuracy with the most capable models. Best for difficult extractions requiring deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
63
63
  callback: z
64
64
  .object({
65
65
  url: z
@@ -79,13 +79,19 @@ const extractInputSchema = z.object({
79
79
  const extractOutputSchema = z.object({
80
80
  data: z
81
81
  .unknown()
82
+ .refine((val) => val != null &&
83
+ (typeof val !== "object" || Object.keys(val).length > 0), { message: "Extracted data must not be empty" })
82
84
  .describe("Extracted structured data matching the provided JSON Schema"),
83
85
  pageCount: z.number().int().describe("Total number of pages in the document"),
84
- model: pdfvectorModelSchema.describe("Model tier used to extract the data"),
86
+ model: pdfvectorModelSchema.describe("Model tier used to extract the data. " +
87
+ "'nano': Fastest, best for simple documents. Supports PDF, Word, Excel, CSV. " +
88
+ "'mini': Balanced speed and accuracy. Supports PDF, Word, Excel, CSV. " +
89
+ "'pro': High accuracy for complex documents. Supports PDF, Word, Excel, CSV, Image. " +
90
+ "'max': Maximum accuracy with deep reasoning. Supports PDF, Word, Excel, CSV, Image."),
85
91
  credits: z
86
92
  .number()
87
93
  .int()
88
- .describe("Number of credits consumed by this API call"),
94
+ .describe("Number of credits consumed by this API call. Cost per page: nano=6, mini=10, pro=14, max=18."),
89
95
  requestId: z
90
96
  .number()
91
97
  .int()
@@ -27,8 +27,8 @@ const parseInputSchema = z.object({
27
27
  base64: z.string().optional().describe("Base64-encoded invoice file content"),
28
28
  model: specializedParseModelSchema.describe("Model tier for parsing. " +
29
29
  "'auto' (default): Automatically selects the best parsing strategy with intelligent fallback. " +
30
- "'pro': Extracts structured invoice fields with standard accuracy. " +
31
- "'max': Extracts structured invoice fields with highest accuracy and fallback."),
30
+ "'pro': 6 credits/page. Extracts structured invoice fields with standard accuracy. " +
31
+ "'max': 10 credits/page. Extracts structured invoice fields with highest accuracy and fallback."),
32
32
  callback: z
33
33
  .object({
34
34
  url: z
@@ -54,7 +54,7 @@ const parseOutputSchema = z.object({
54
54
  credits: z
55
55
  .number()
56
56
  .int()
57
- .describe("Number of credits consumed by this API call"),
57
+ .describe("Number of credits consumed by this API call. Cost per page: pro=6, max=10."),
58
58
  requestId: z
59
59
  .number()
60
60
  .int()
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @pdfvector/instance-contract
2
2
 
3
+ ## 0.0.27
4
+ ### Patch Changes
5
+
6
+
7
+
8
+ - [#146](https://github.com/phuctm97/pdfvector/pull/146) [`3f46b91`](https://github.com/phuctm97/pdfvector/commit/3f46b91bff72ba75616a5b529aa3e511ea4fb8a2) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Fix extract empty responses, track actual model, and reorder tier models
9
+
10
+ ## 0.0.26
11
+ ### Patch Changes
12
+
13
+
14
+
15
+ - [#144](https://github.com/phuctm97/pdfvector/pull/144) [`4ba69fc`](https://github.com/phuctm97/pdfvector/commit/4ba69fc05b9fe462bfb9475d923cad66211c3222) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Add schema mismatch retry and empty output validation for extract API
16
+
3
17
  ## 0.0.25
4
18
  ### Patch Changes
5
19
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pdfvector/instance-contract",
3
- "version": "0.0.25",
3
+ "version": "0.0.27",
4
4
  "type": "module",
5
5
  "description": "API contract definitions for PDFVector instance server",
6
6
  "license": "MIT",