@pdfvector/instance-contract 0.0.11 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.tsc/lib/router/document/ask.d.ts +2 -0
- package/.tsc/lib/router/document/ask.js +9 -0
- package/.tsc/lib/router/document/extract.d.ts +5 -3
- package/.tsc/lib/router/document/extract.js +43 -38
- package/.tsc/lib/router/document/get-default-spec.js +9 -0
- package/.tsc/lib/router/document/parse.d.ts +2 -0
- package/.tsc/lib/router/document/parse.js +10 -15
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
|
@@ -47,12 +47,21 @@ const askOutputSchema = z
|
|
|
47
47
|
"'mini': Supports PDF, Word, Excel, CSV. " +
|
|
48
48
|
"'pro': Supports PDF, Word, Excel, CSV, Image. " +
|
|
49
49
|
"'max': Supports PDF, Word, Excel, CSV, Image."),
|
|
50
|
+
requestId: z
|
|
51
|
+
.number()
|
|
52
|
+
.int()
|
|
53
|
+
.describe("Unique request identifier for this API call"),
|
|
54
|
+
documentId: z
|
|
55
|
+
.string()
|
|
56
|
+
.optional()
|
|
57
|
+
.describe("Document ID if provided via x-pdfvector-document-id header"),
|
|
50
58
|
})
|
|
51
59
|
.meta({
|
|
52
60
|
examples: [
|
|
53
61
|
{
|
|
54
62
|
markdown: "The study found that viral shedding peaked during the first week of symptoms, with the highest viral loads detected in throat swabs.",
|
|
55
63
|
model: "mini",
|
|
64
|
+
requestId: 1,
|
|
56
65
|
},
|
|
57
66
|
],
|
|
58
67
|
});
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
export declare const extract: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.
|
|
2
|
+
export declare const extract: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
|
|
3
3
|
url: z.ZodOptional<z.ZodURL>;
|
|
4
4
|
file: z.ZodOptional<z.ZodFile>;
|
|
5
5
|
base64: z.ZodOptional<z.ZodString>;
|
|
6
6
|
prompt: z.ZodString;
|
|
7
|
-
schema: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
7
|
+
schema: z.ZodUnion<readonly [z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodPipe<z.ZodString, z.ZodTransform<Record<string, unknown>, string>>]>;
|
|
8
8
|
model: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
9
9
|
nano: "nano";
|
|
10
10
|
mini: "mini";
|
|
@@ -12,7 +12,7 @@ export declare const extract: import("@orpc/contract").ContractProcedureBuilderW
|
|
|
12
12
|
max: "max";
|
|
13
13
|
auto: "auto";
|
|
14
14
|
}>>>;
|
|
15
|
-
}, z.core.$strip
|
|
15
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
16
16
|
data: z.ZodUnknown;
|
|
17
17
|
model: z.ZodEnum<{
|
|
18
18
|
nano: "nano";
|
|
@@ -20,4 +20,6 @@ export declare const extract: import("@orpc/contract").ContractProcedureBuilderW
|
|
|
20
20
|
pro: "pro";
|
|
21
21
|
max: "max";
|
|
22
22
|
}>;
|
|
23
|
+
requestId: z.ZodNumber;
|
|
24
|
+
documentId: z.ZodOptional<z.ZodString>;
|
|
23
25
|
}, z.core.$strip>, Record<never, never>, Record<never, never>>;
|
|
@@ -2,35 +2,7 @@ import { oc } from "@orpc/contract";
|
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
|
|
4
4
|
import { getDefaultSpec } from "./get-default-spec";
|
|
5
|
-
|
|
6
|
-
function expandDotNotation(input) {
|
|
7
|
-
if (typeof input !== "object" || input === null)
|
|
8
|
-
return input;
|
|
9
|
-
const obj = input;
|
|
10
|
-
const dotKeys = Object.keys(obj).filter((k) => k.includes("."));
|
|
11
|
-
if (dotKeys.length === 0)
|
|
12
|
-
return input;
|
|
13
|
-
const result = { ...obj };
|
|
14
|
-
for (const key of dotKeys) {
|
|
15
|
-
const parts = key.split(".");
|
|
16
|
-
let current = result;
|
|
17
|
-
for (let i = 0; i < parts.length - 1; i++) {
|
|
18
|
-
const part = parts[i];
|
|
19
|
-
if (part === undefined)
|
|
20
|
-
continue;
|
|
21
|
-
if (!(part in current) || typeof current[part] !== "object") {
|
|
22
|
-
current[part] = {};
|
|
23
|
-
}
|
|
24
|
-
current = current[part];
|
|
25
|
-
}
|
|
26
|
-
const lastPart = parts[parts.length - 1];
|
|
27
|
-
if (lastPart !== undefined)
|
|
28
|
-
current[lastPart] = obj[key];
|
|
29
|
-
delete result[key];
|
|
30
|
-
}
|
|
31
|
-
return result;
|
|
32
|
-
}
|
|
33
|
-
const extractInputSchema = z.preprocess(expandDotNotation, z.object({
|
|
5
|
+
const extractInputSchema = z.object({
|
|
34
6
|
url: z
|
|
35
7
|
.url()
|
|
36
8
|
.optional()
|
|
@@ -57,8 +29,32 @@ const extractInputSchema = z.preprocess(expandDotNotation, z.object({
|
|
|
57
29
|
.min(4)
|
|
58
30
|
.describe("The prompt instructing the AI how to extract data from the document"),
|
|
59
31
|
schema: z
|
|
60
|
-
.
|
|
61
|
-
.
|
|
32
|
+
.union([
|
|
33
|
+
z.record(z.string(), z.unknown()),
|
|
34
|
+
z.string().transform((str, ctx) => {
|
|
35
|
+
try {
|
|
36
|
+
// Strip surrounding quotes added by form-data clients
|
|
37
|
+
const trimmed = str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str;
|
|
38
|
+
const parsed = JSON.parse(trimmed);
|
|
39
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
40
|
+
ctx.addIssue({
|
|
41
|
+
code: z.ZodIssueCode.custom,
|
|
42
|
+
message: "Schema must be a JSON object",
|
|
43
|
+
});
|
|
44
|
+
return z.NEVER;
|
|
45
|
+
}
|
|
46
|
+
return parsed;
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
ctx.addIssue({
|
|
50
|
+
code: z.ZodIssueCode.custom,
|
|
51
|
+
message: "Invalid JSON string for schema",
|
|
52
|
+
});
|
|
53
|
+
return z.NEVER;
|
|
54
|
+
}
|
|
55
|
+
}),
|
|
56
|
+
])
|
|
57
|
+
.describe("JSON Schema describing the structure of the data to extract from the document. Can be a JSON object or a JSON string."),
|
|
62
58
|
model: z
|
|
63
59
|
.enum(["auto", ...pdfvectorModelSchema.options])
|
|
64
60
|
.optional()
|
|
@@ -69,7 +65,7 @@ const extractInputSchema = z.preprocess(expandDotNotation, z.object({
|
|
|
69
65
|
"'mini': Uses mid-range models (likely better than GPT-5-mini). Supports PDF, Word, Excel, CSV. " +
|
|
70
66
|
"'pro': Uses capable models (likely better than GPT-5.2). Supports PDF, Word, Excel, CSV, Image. " +
|
|
71
67
|
"'max': Uses the most powerful models (likely better than Claude Opus 4.6). Supports PDF, Word, Excel, CSV, Image."),
|
|
72
|
-
})
|
|
68
|
+
});
|
|
73
69
|
const extractOutputSchema = z
|
|
74
70
|
.object({
|
|
75
71
|
data: z
|
|
@@ -80,6 +76,14 @@ const extractOutputSchema = z
|
|
|
80
76
|
"'mini': Supports PDF, Word, Excel, CSV. " +
|
|
81
77
|
"'pro': Supports PDF, Word, Excel, CSV, Image. " +
|
|
82
78
|
"'max': Supports PDF, Word, Excel, CSV, Image."),
|
|
79
|
+
requestId: z
|
|
80
|
+
.number()
|
|
81
|
+
.int()
|
|
82
|
+
.describe("Unique request identifier for this API call"),
|
|
83
|
+
documentId: z
|
|
84
|
+
.string()
|
|
85
|
+
.optional()
|
|
86
|
+
.describe("Document ID if provided via x-pdfvector-document-id header"),
|
|
83
87
|
})
|
|
84
88
|
.meta({
|
|
85
89
|
examples: [
|
|
@@ -90,6 +94,7 @@ const extractOutputSchema = z
|
|
|
90
94
|
year: 2020,
|
|
91
95
|
},
|
|
92
96
|
model: "mini",
|
|
97
|
+
requestId: 1,
|
|
93
98
|
},
|
|
94
99
|
],
|
|
95
100
|
});
|
|
@@ -99,7 +104,7 @@ const requestExamples = {
|
|
|
99
104
|
value: {
|
|
100
105
|
url: "https://drive.google.com/file/d/13T04Yk20OwBNIDyvJJ3XlUg9WfOsmbjm/view?usp=share_link",
|
|
101
106
|
prompt: "Extract the title, authors, and publication year from this research paper",
|
|
102
|
-
schema: {
|
|
107
|
+
schema: JSON.stringify({
|
|
103
108
|
type: "object",
|
|
104
109
|
properties: {
|
|
105
110
|
title: { type: "string" },
|
|
@@ -107,7 +112,7 @@ const requestExamples = {
|
|
|
107
112
|
year: { type: "number" },
|
|
108
113
|
},
|
|
109
114
|
required: ["title", "authors", "year"],
|
|
110
|
-
},
|
|
115
|
+
}),
|
|
111
116
|
},
|
|
112
117
|
},
|
|
113
118
|
"Extract from base64": {
|
|
@@ -115,13 +120,13 @@ const requestExamples = {
|
|
|
115
120
|
value: {
|
|
116
121
|
base64: "JVBERi0xLjAKMSAwIG9iajw8L1R5cGUvQ2F0YWxvZy9QYWdlcyAyIDAgUj4+ZW5kb2JqIDIgMCBvYmo8PC9UeXBlL1BhZ2VzL0tpZHNbMyAwIFJdL0NvdW50IDE+PmVuZG9iaiAzIDAgb2JqPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCAzIDNdL1BhcmVudCAyIDAgUj4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAwOSAwMDAwMCBuIAowMDAwMDAwMDU4IDAwMDAwIG4gCjAwMDAwMDAxMTUgMDAwMDAgbiAKdHJhaWxlcjw8L1NpemUgNC9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjE5MAolJUVPRg==",
|
|
117
122
|
prompt: "Extract the main content from this document",
|
|
118
|
-
schema: {
|
|
123
|
+
schema: JSON.stringify({
|
|
119
124
|
type: "object",
|
|
120
125
|
properties: {
|
|
121
126
|
content: { type: "string" },
|
|
122
127
|
},
|
|
123
128
|
required: ["content"],
|
|
124
|
-
},
|
|
129
|
+
}),
|
|
125
130
|
},
|
|
126
131
|
},
|
|
127
132
|
"Extract from file upload": {
|
|
@@ -129,14 +134,14 @@ const requestExamples = {
|
|
|
129
134
|
value: {
|
|
130
135
|
file: "(binary)",
|
|
131
136
|
prompt: "Extract the title and summary from this document",
|
|
132
|
-
schema: {
|
|
137
|
+
schema: JSON.stringify({
|
|
133
138
|
type: "object",
|
|
134
139
|
properties: {
|
|
135
140
|
title: { type: "string" },
|
|
136
141
|
summary: { type: "string" },
|
|
137
142
|
},
|
|
138
143
|
required: ["title", "summary"],
|
|
139
|
-
},
|
|
144
|
+
}),
|
|
140
145
|
},
|
|
141
146
|
},
|
|
142
147
|
};
|
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
export function getDefaultSpec(op, requestExamples) {
|
|
2
2
|
op.security = [{ bearerAuth: [] }];
|
|
3
|
+
const params = (op.parameters ?? []);
|
|
4
|
+
params.push({
|
|
5
|
+
name: "x-pdfvector-document-id",
|
|
6
|
+
in: "header",
|
|
7
|
+
required: false,
|
|
8
|
+
schema: { type: "string", default: "my-doc-123" },
|
|
9
|
+
description: "Optional document ID to associate with this request. Returned in the response and saved for usage tracking.",
|
|
10
|
+
});
|
|
11
|
+
op.parameters = params;
|
|
3
12
|
const reqBody = op.requestBody;
|
|
4
13
|
if (reqBody?.content) {
|
|
5
14
|
for (const mediaType of Object.values(reqBody.content)) {
|
|
@@ -35,27 +35,22 @@ const parseInputSchema = z.object({
|
|
|
35
35
|
"'pro': For documents up to 30 pages with tables, handwritten text, figures, math, and Arabic. File up to 30 pages, up to 40MB in size. Supports PDF, Word, Excel, CSV, Image. " +
|
|
36
36
|
"'max': For large documents with full Pro capabilities plus enhanced multilingual support. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image."),
|
|
37
37
|
});
|
|
38
|
-
const parseOutputSchema = z
|
|
39
|
-
.object({
|
|
38
|
+
const parseOutputSchema = z.object({
|
|
40
39
|
markdown: z.string().describe("Extracted text content from the document"),
|
|
41
|
-
pageCount: z
|
|
42
|
-
.number()
|
|
43
|
-
.int()
|
|
44
|
-
.describe("Total number of pages in the document"),
|
|
40
|
+
pageCount: z.number().int().describe("Total number of pages in the document"),
|
|
45
41
|
model: pdfvectorModelSchema.describe("Model tier used to parse the document. " +
|
|
46
42
|
"'nano': Supports PDF, Word, Excel, CSV. " +
|
|
47
43
|
"'mini': Supports PDF, Word, Excel, CSV. " +
|
|
48
44
|
"'pro': Supports PDF, Word, Excel, CSV, Image. " +
|
|
49
45
|
"'max': Supports PDF, Word, Excel, CSV, Image."),
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
],
|
|
46
|
+
requestId: z
|
|
47
|
+
.number()
|
|
48
|
+
.int()
|
|
49
|
+
.describe("Unique request identifier for this API call"),
|
|
50
|
+
documentId: z
|
|
51
|
+
.string()
|
|
52
|
+
.optional()
|
|
53
|
+
.describe("Document ID if provided via x-pdfvector-document-id header"),
|
|
59
54
|
});
|
|
60
55
|
const requestExamples = {
|
|
61
56
|
"Parse from URL": {
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# @pdfvector/instance-contract
|
|
2
2
|
|
|
3
|
+
## 0.0.13
|
|
4
|
+
### Patch Changes
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
- [#87](https://github.com/phuctm97/pdfvector/pull/87) [`a147b7c`](https://github.com/phuctm97/pdfvector/commit/a147b7c7646aab95422e5b874acbc89695a1c985) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Dont store file in instance and update org for spa
|
|
9
|
+
|
|
10
|
+
## 0.0.12
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
- [#73](https://github.com/phuctm97/pdfvector/pull/73) [`ac9abc9`](https://github.com/phuctm97/pdfvector/commit/ac9abc91033f12b2af2ff9e49393873f1759dcda) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Add usage tracker
|
|
16
|
+
|
|
3
17
|
## 0.0.11
|
|
4
18
|
### Patch Changes
|
|
5
19
|
|