@pdfvector/instance-contract 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,4 +19,6 @@ export declare const ask: import("@orpc/contract").ContractProcedureBuilderWithI
19
19
  pro: "pro";
20
20
  max: "max";
21
21
  }>;
22
+ requestId: z.ZodNumber;
23
+ documentId: z.ZodOptional<z.ZodString>;
22
24
  }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -47,12 +47,21 @@ const askOutputSchema = z
47
47
  "'mini': Supports PDF, Word, Excel, CSV. " +
48
48
  "'pro': Supports PDF, Word, Excel, CSV, Image. " +
49
49
  "'max': Supports PDF, Word, Excel, CSV, Image."),
50
+ requestId: z
51
+ .number()
52
+ .int()
53
+ .describe("Unique request identifier for this API call"),
54
+ documentId: z
55
+ .string()
56
+ .optional()
57
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
50
58
  })
51
59
  .meta({
52
60
  examples: [
53
61
  {
54
62
  markdown: "The study found that viral shedding peaked during the first week of symptoms, with the highest viral loads detected in throat swabs.",
55
63
  model: "mini",
64
+ requestId: 1,
56
65
  },
57
66
  ],
58
67
  });
@@ -1,10 +1,10 @@
1
1
  import { z } from "zod";
2
- export declare const extract: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodObject<{
2
+ export declare const extract: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
3
  url: z.ZodOptional<z.ZodURL>;
4
4
  file: z.ZodOptional<z.ZodFile>;
5
5
  base64: z.ZodOptional<z.ZodString>;
6
6
  prompt: z.ZodString;
7
- schema: z.ZodRecord<z.ZodString, z.ZodUnknown>;
7
+ schema: z.ZodUnion<readonly [z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodPipe<z.ZodString, z.ZodTransform<Record<string, unknown>, string>>]>;
8
8
  model: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
9
9
  nano: "nano";
10
10
  mini: "mini";
@@ -12,7 +12,7 @@ export declare const extract: import("@orpc/contract").ContractProcedureBuilderW
12
12
  max: "max";
13
13
  auto: "auto";
14
14
  }>>>;
15
- }, z.core.$strip>>, z.ZodObject<{
15
+ }, z.core.$strip>, z.ZodObject<{
16
16
  data: z.ZodUnknown;
17
17
  model: z.ZodEnum<{
18
18
  nano: "nano";
@@ -20,4 +20,6 @@ export declare const extract: import("@orpc/contract").ContractProcedureBuilderW
20
20
  pro: "pro";
21
21
  max: "max";
22
22
  }>;
23
+ requestId: z.ZodNumber;
24
+ documentId: z.ZodOptional<z.ZodString>;
23
25
  }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -2,35 +2,7 @@ import { oc } from "@orpc/contract";
2
2
  import { z } from "zod";
3
3
  import { pdfvectorModelSchema } from "../../pdfvector-model-schema";
4
4
  import { getDefaultSpec } from "./get-default-spec";
5
- // Support for dot-notation in multipart form-data
6
- function expandDotNotation(input) {
7
- if (typeof input !== "object" || input === null)
8
- return input;
9
- const obj = input;
10
- const dotKeys = Object.keys(obj).filter((k) => k.includes("."));
11
- if (dotKeys.length === 0)
12
- return input;
13
- const result = { ...obj };
14
- for (const key of dotKeys) {
15
- const parts = key.split(".");
16
- let current = result;
17
- for (let i = 0; i < parts.length - 1; i++) {
18
- const part = parts[i];
19
- if (part === undefined)
20
- continue;
21
- if (!(part in current) || typeof current[part] !== "object") {
22
- current[part] = {};
23
- }
24
- current = current[part];
25
- }
26
- const lastPart = parts[parts.length - 1];
27
- if (lastPart !== undefined)
28
- current[lastPart] = obj[key];
29
- delete result[key];
30
- }
31
- return result;
32
- }
33
- const extractInputSchema = z.preprocess(expandDotNotation, z.object({
5
+ const extractInputSchema = z.object({
34
6
  url: z
35
7
  .url()
36
8
  .optional()
@@ -57,8 +29,32 @@ const extractInputSchema = z.preprocess(expandDotNotation, z.object({
57
29
  .min(4)
58
30
  .describe("The prompt instructing the AI how to extract data from the document"),
59
31
  schema: z
60
- .record(z.string(), z.unknown())
61
- .describe("JSON Schema describing the structure of the data to extract from the document"),
32
+ .union([
33
+ z.record(z.string(), z.unknown()),
34
+ z.string().transform((str, ctx) => {
35
+ try {
36
+ // Strip surrounding quotes added by form-data clients
37
+ const trimmed = str.startsWith('"') && str.endsWith('"') ? str.slice(1, -1) : str;
38
+ const parsed = JSON.parse(trimmed);
39
+ if (typeof parsed !== "object" || parsed === null) {
40
+ ctx.addIssue({
41
+ code: z.ZodIssueCode.custom,
42
+ message: "Schema must be a JSON object",
43
+ });
44
+ return z.NEVER;
45
+ }
46
+ return parsed;
47
+ }
48
+ catch {
49
+ ctx.addIssue({
50
+ code: z.ZodIssueCode.custom,
51
+ message: "Invalid JSON string for schema",
52
+ });
53
+ return z.NEVER;
54
+ }
55
+ }),
56
+ ])
57
+ .describe("JSON Schema describing the structure of the data to extract from the document. Can be a JSON object or a JSON string."),
62
58
  model: z
63
59
  .enum(["auto", ...pdfvectorModelSchema.options])
64
60
  .optional()
@@ -69,7 +65,7 @@ const extractInputSchema = z.preprocess(expandDotNotation, z.object({
69
65
  "'mini': Uses mid-range models (likely better than GPT-5-mini). Supports PDF, Word, Excel, CSV. " +
70
66
  "'pro': Uses capable models (likely better than GPT-5.2). Supports PDF, Word, Excel, CSV, Image. " +
71
67
  "'max': Uses the most powerful models (likely better than Claude Opus 4.6). Supports PDF, Word, Excel, CSV, Image."),
72
- }));
68
+ });
73
69
  const extractOutputSchema = z
74
70
  .object({
75
71
  data: z
@@ -80,6 +76,14 @@ const extractOutputSchema = z
80
76
  "'mini': Supports PDF, Word, Excel, CSV. " +
81
77
  "'pro': Supports PDF, Word, Excel, CSV, Image. " +
82
78
  "'max': Supports PDF, Word, Excel, CSV, Image."),
79
+ requestId: z
80
+ .number()
81
+ .int()
82
+ .describe("Unique request identifier for this API call"),
83
+ documentId: z
84
+ .string()
85
+ .optional()
86
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
83
87
  })
84
88
  .meta({
85
89
  examples: [
@@ -90,6 +94,7 @@ const extractOutputSchema = z
90
94
  year: 2020,
91
95
  },
92
96
  model: "mini",
97
+ requestId: 1,
93
98
  },
94
99
  ],
95
100
  });
@@ -99,7 +104,7 @@ const requestExamples = {
99
104
  value: {
100
105
  url: "https://drive.google.com/file/d/13T04Yk20OwBNIDyvJJ3XlUg9WfOsmbjm/view?usp=share_link",
101
106
  prompt: "Extract the title, authors, and publication year from this research paper",
102
- schema: {
107
+ schema: JSON.stringify({
103
108
  type: "object",
104
109
  properties: {
105
110
  title: { type: "string" },
@@ -107,7 +112,7 @@ const requestExamples = {
107
112
  year: { type: "number" },
108
113
  },
109
114
  required: ["title", "authors", "year"],
110
- },
115
+ }),
111
116
  },
112
117
  },
113
118
  "Extract from base64": {
@@ -115,13 +120,13 @@ const requestExamples = {
115
120
  value: {
116
121
  base64: "JVBERi0xLjAKMSAwIG9iajw8L1R5cGUvQ2F0YWxvZy9QYWdlcyAyIDAgUj4+ZW5kb2JqIDIgMCBvYmo8PC9UeXBlL1BhZ2VzL0tpZHNbMyAwIFJdL0NvdW50IDE+PmVuZG9iaiAzIDAgb2JqPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCAzIDNdL1BhcmVudCAyIDAgUj4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAwOSAwMDAwMCBuIAowMDAwMDAwMDU4IDAwMDAwIG4gCjAwMDAwMDAxMTUgMDAwMDAgbiAKdHJhaWxlcjw8L1NpemUgNC9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjE5MAolJUVPRg==",
117
122
  prompt: "Extract the main content from this document",
118
- schema: {
123
+ schema: JSON.stringify({
119
124
  type: "object",
120
125
  properties: {
121
126
  content: { type: "string" },
122
127
  },
123
128
  required: ["content"],
124
- },
129
+ }),
125
130
  },
126
131
  },
127
132
  "Extract from file upload": {
@@ -129,14 +134,14 @@ const requestExamples = {
129
134
  value: {
130
135
  file: "(binary)",
131
136
  prompt: "Extract the title and summary from this document",
132
- schema: {
137
+ schema: JSON.stringify({
133
138
  type: "object",
134
139
  properties: {
135
140
  title: { type: "string" },
136
141
  summary: { type: "string" },
137
142
  },
138
143
  required: ["title", "summary"],
139
- },
144
+ }),
140
145
  },
141
146
  },
142
147
  };
@@ -1,5 +1,14 @@
1
1
  export function getDefaultSpec(op, requestExamples) {
2
2
  op.security = [{ bearerAuth: [] }];
3
+ const params = (op.parameters ?? []);
4
+ params.push({
5
+ name: "x-pdfvector-document-id",
6
+ in: "header",
7
+ required: false,
8
+ schema: { type: "string", default: "my-doc-123" },
9
+ description: "Optional document ID to associate with this request. Returned in the response and saved for usage tracking.",
10
+ });
11
+ op.parameters = params;
3
12
  const reqBody = op.requestBody;
4
13
  if (reqBody?.content) {
5
14
  for (const mediaType of Object.values(reqBody.content)) {
@@ -19,4 +19,6 @@ export declare const parse: import("@orpc/contract").ContractProcedureBuilderWit
19
19
  pro: "pro";
20
20
  max: "max";
21
21
  }>;
22
+ requestId: z.ZodNumber;
23
+ documentId: z.ZodOptional<z.ZodString>;
22
24
  }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -35,27 +35,22 @@ const parseInputSchema = z.object({
35
35
  "'pro': For documents up to 30 pages with tables, handwritten text, figures, math, and Arabic. File up to 30 pages, up to 40MB in size. Supports PDF, Word, Excel, CSV, Image. " +
36
36
  "'max': For large documents with full Pro capabilities plus enhanced multilingual support. File up to 1000 pages, up to 500MB in size. Supports PDF, Word, Excel, CSV, Image."),
37
37
  });
38
- const parseOutputSchema = z
39
- .object({
38
+ const parseOutputSchema = z.object({
40
39
  markdown: z.string().describe("Extracted text content from the document"),
41
- pageCount: z
42
- .number()
43
- .int()
44
- .describe("Total number of pages in the document"),
40
+ pageCount: z.number().int().describe("Total number of pages in the document"),
45
41
  model: pdfvectorModelSchema.describe("Model tier used to parse the document. " +
46
42
  "'nano': Supports PDF, Word, Excel, CSV. " +
47
43
  "'mini': Supports PDF, Word, Excel, CSV. " +
48
44
  "'pro': Supports PDF, Word, Excel, CSV, Image. " +
49
45
  "'max': Supports PDF, Word, Excel, CSV, Image."),
50
- })
51
- .meta({
52
- examples: [
53
- {
54
- markdown: "Nature | Vol 581 | 28 May 2020 | 465\nArticle\nVirological assessment of hospitalized patients with COVID-2019...",
55
- pageCount: 12,
56
- model: "nano",
57
- },
58
- ],
46
+ requestId: z
47
+ .number()
48
+ .int()
49
+ .describe("Unique request identifier for this API call"),
50
+ documentId: z
51
+ .string()
52
+ .optional()
53
+ .describe("Document ID if provided via x-pdfvector-document-id header"),
59
54
  });
60
55
  const requestExamples = {
61
56
  "Parse from URL": {
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @pdfvector/instance-contract
2
2
 
3
+ ## 0.0.13
4
+ ### Patch Changes
5
+
6
+
7
+
8
+ - [#87](https://github.com/phuctm97/pdfvector/pull/87) [`a147b7c`](https://github.com/phuctm97/pdfvector/commit/a147b7c7646aab95422e5b874acbc89695a1c985) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Dont store file in instance and update org for spa
9
+
10
+ ## 0.0.12
11
+ ### Patch Changes
12
+
13
+
14
+
15
+ - [#73](https://github.com/phuctm97/pdfvector/pull/73) [`ac9abc9`](https://github.com/phuctm97/pdfvector/commit/ac9abc91033f12b2af2ff9e49393873f1759dcda) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Add usage tracker
16
+
3
17
  ## 0.0.11
4
18
  ### Patch Changes
5
19
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pdfvector/instance-contract",
3
- "version": "0.0.11",
3
+ "version": "0.0.13",
4
4
  "type": "module",
5
5
  "main": ".tsc/lib/index.js",
6
6
  "dependencies": {