@pdfvector/instance-client 0.0.49 → 0.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  export type PDFVectorErrorCode = "BAD_REQUEST" | "UNAUTHORIZED" | "NOT_FOUND" | "TOO_MANY_REQUESTS" | "CONFLICT" | "NOT_IMPLEMENTED" | "UNPROCESSABLE_CONTENT" | "INTERNAL_SERVER_ERROR";
2
+ export type PDFVectorErrorCategory = "authentication" | "validation" | "document_input" | "document_processing" | "rate_limit" | "not_found" | "conflict" | "unsupported" | "server";
3
+ export type PDFVectorErrorOrigin = "user" | "system";
2
4
  export interface PDFVectorErrorOptions {
3
5
  code: PDFVectorErrorCode;
4
6
  status: number;
@@ -7,6 +9,25 @@ export interface PDFVectorErrorOptions {
7
9
  cause?: unknown;
8
10
  }
9
11
  type SubclassOptions = Omit<PDFVectorErrorOptions, "code" | "status">;
12
+ export interface PDFVectorAgentError {
13
+ name: string;
14
+ code: PDFVectorErrorCode;
15
+ status: number;
16
+ category: PDFVectorErrorCategory;
17
+ origin: PDFVectorErrorOrigin;
18
+ userError: boolean;
19
+ title: string;
20
+ message: string;
21
+ suggestion: string;
22
+ retryable: boolean;
23
+ retryableWithHigherModel: boolean;
24
+ requestId?: number;
25
+ documentId?: string;
26
+ reasonCode?: string;
27
+ requestedModel?: string;
28
+ supportMessage: string;
29
+ data: Record<string, unknown>;
30
+ }
10
31
  export declare class PDFVectorError extends Error {
11
32
  readonly code: PDFVectorErrorCode;
12
33
  readonly status: number;
@@ -14,7 +35,18 @@ export declare class PDFVectorError extends Error {
14
35
  readonly requestId?: number;
15
36
  readonly documentId?: string;
16
37
  readonly userError: boolean;
38
+ readonly category: PDFVectorErrorCategory;
39
+ readonly origin: PDFVectorErrorOrigin;
40
+ readonly title: string;
41
+ readonly suggestion: string;
42
+ readonly retryable: boolean;
43
+ readonly retryableWithHigherModel: boolean;
44
+ readonly requestedModel?: string;
45
+ readonly reasonCode?: string;
46
+ readonly supportMessage: string;
17
47
  constructor(options: PDFVectorErrorOptions);
48
+ toAgentError(): PDFVectorAgentError;
49
+ toJSON(): PDFVectorAgentError;
18
50
  }
19
51
  export declare class BadRequestError extends PDFVectorError {
20
52
  constructor(options: SubclassOptions);
@@ -28,6 +60,7 @@ export declare class NotFoundError extends PDFVectorError {
28
60
  export declare class TooManyRequestsError extends PDFVectorError {
29
61
  readonly limit?: number;
30
62
  readonly resetAt?: string;
63
+ readonly retryAfterSeconds?: number;
31
64
  constructor(options: SubclassOptions);
32
65
  }
33
66
  export declare class ConflictError extends PDFVectorError {
@@ -67,6 +100,10 @@ export declare class URLFetchError extends BadRequestError {
67
100
  readonly statusText?: string;
68
101
  constructor(options: SubclassOptions);
69
102
  }
103
+ export declare class InvalidDocumentURLError extends BadRequestError {
104
+ }
105
+ export declare class InvalidBase64Error extends BadRequestError {
106
+ }
70
107
  export declare class TierNotSupportedError extends BadRequestError {
71
108
  readonly documentType?: string;
72
109
  readonly model?: string;
@@ -88,6 +125,19 @@ export declare class ExtractionFailedError extends UnprocessableContentError {
88
125
  readonly rawText?: string;
89
126
  constructor(options: SubclassOptions);
90
127
  }
128
+ export declare class AcademicPaperNotFoundError extends NotFoundError {
129
+ readonly input?: string;
130
+ readonly paperErrorCode?: string;
131
+ constructor(options: SubclassOptions);
132
+ }
133
+ export declare class NoPublicPDFError extends NotFoundError {
134
+ readonly input?: string;
135
+ readonly paperTitle?: string;
136
+ readonly doi?: string;
137
+ readonly providerURL?: string;
138
+ constructor(options: SubclassOptions);
139
+ }
91
140
  export declare function fromORPCError(error: unknown): PDFVectorError | undefined;
92
141
  export declare function isPDFVectorError(error: unknown): error is PDFVectorError;
142
+ export declare function isPDFVectorUserError(error: unknown): error is PDFVectorError;
93
143
  export {};
@@ -14,6 +14,191 @@ function readStringArray(data, key) {
14
14
  const strings = value.filter((v) => typeof v === "string");
15
15
  return strings.length === value.length ? strings : undefined;
16
16
  }
17
+ function readBoolean(data, key) {
18
+ const value = data?.[key];
19
+ return typeof value === "boolean" ? value : undefined;
20
+ }
21
+ function readNumberFromMatch(message, pattern) {
22
+ const match = message.match(pattern);
23
+ if (!match?.[1])
24
+ return undefined;
25
+ const number = Number.parseFloat(match[1]);
26
+ return Number.isFinite(number) ? number : undefined;
27
+ }
28
+ function readStringFromMatch(message, pattern) {
29
+ const match = message.match(pattern);
30
+ const value = match?.[1]?.trim();
31
+ return value ? value : undefined;
32
+ }
33
+ function readListFromMatch(message, pattern) {
34
+ const value = readStringFromMatch(message, pattern);
35
+ if (!value)
36
+ return undefined;
37
+ const items = value
38
+ .split(",")
39
+ .map((item) => item.trim())
40
+ .filter(Boolean);
41
+ return items.length > 0 ? items : undefined;
42
+ }
43
+ function hasHigherModelHint(message, data) {
44
+ const lower = message.toLowerCase();
45
+ return (readBoolean(data, "retryableWithHigherModel") === true ||
46
+ lower.includes("try with a higher model") ||
47
+ lower.includes("use a higher model") ||
48
+ lower.includes("use higher model") ||
49
+ lower.startsWith("file too large") ||
50
+ (lower.startsWith("document has") &&
51
+ lower.includes("page") &&
52
+ lower.includes("limit")));
53
+ }
54
+ function inferCategory(code, message, data) {
55
+ const lower = message.toLowerCase();
56
+ if (code === "UNAUTHORIZED")
57
+ return "authentication";
58
+ if (code === "TOO_MANY_REQUESTS")
59
+ return "rate_limit";
60
+ if (code === "NOT_FOUND")
61
+ return "not_found";
62
+ if (code === "CONFLICT")
63
+ return "conflict";
64
+ if (code === "NOT_IMPLEMENTED")
65
+ return "unsupported";
66
+ if (code === "INTERNAL_SERVER_ERROR")
67
+ return "server";
68
+ if (code === "UNPROCESSABLE_CONTENT") {
69
+ return data.userError === true ? "document_input" : "document_processing";
70
+ }
71
+ if (lower.includes("json schema") ||
72
+ lower.includes("invalid input") ||
73
+ lower.includes("invalid type") ||
74
+ lower.includes("expected")) {
75
+ return "validation";
76
+ }
77
+ return "document_input";
78
+ }
79
+ function inferOrigin(code, category, data) {
80
+ if (data.userError === true)
81
+ return "user";
82
+ if (code === "INTERNAL_SERVER_ERROR")
83
+ return "system";
84
+ if (category === "document_processing" && data.userError !== true)
85
+ return "system";
86
+ return "user";
87
+ }
88
+ function inferTitle(code, className, message) {
89
+ const lower = message.toLowerCase();
90
+ if (className === "FileTooLargeError")
91
+ return "File too large";
92
+ if (className === "PageLimitExceededError")
93
+ return "Page limit exceeded";
94
+ if (className === "PasswordProtectedError")
95
+ return "Password-protected document";
96
+ if (className === "UnsupportedFormatError")
97
+ return "Unsupported document format";
98
+ if (className === "URLFetchError")
99
+ return "Could not fetch document URL";
100
+ if (className === "InvalidDocumentURLError")
101
+ return "Invalid document URL";
102
+ if (className === "InvalidBase64Error")
103
+ return "Invalid base64 document";
104
+ if (className === "TierNotSupportedError")
105
+ return "Model does not support this file type";
106
+ if (className === "InvalidSchemaError")
107
+ return "Invalid JSON Schema";
108
+ if (className === "NoInputProvidedError")
109
+ return "No document input provided";
110
+ if (className === "EmptyDocumentError")
111
+ return "Empty document";
112
+ if (className === "NoTextDetectedError")
113
+ return "No readable text detected";
114
+ if (className === "ExtractionFailedError")
115
+ return "Extraction failed";
116
+ if (className === "AcademicPaperNotFoundError")
117
+ return "Paper not found";
118
+ if (className === "NoPublicPDFError")
119
+ return "No public PDF found";
120
+ if (lower.includes("no public pdf url"))
121
+ return "No public PDF found";
122
+ if (lower.includes("paper not found"))
123
+ return "Paper not found";
124
+ switch (code) {
125
+ case "BAD_REQUEST":
126
+ return "Invalid request";
127
+ case "UNAUTHORIZED":
128
+ return "Authentication failed";
129
+ case "NOT_FOUND":
130
+ return "Resource not found";
131
+ case "TOO_MANY_REQUESTS":
132
+ return "Rate limit exceeded";
133
+ case "CONFLICT":
134
+ return "Request conflict";
135
+ case "NOT_IMPLEMENTED":
136
+ return "Not implemented";
137
+ case "UNPROCESSABLE_CONTENT":
138
+ return "Document could not be processed";
139
+ case "INTERNAL_SERVER_ERROR":
140
+ return "PDF Vector server error";
141
+ }
142
+ }
143
+ function inferSuggestion(code, message, data) {
144
+ const lower = message.toLowerCase();
145
+ const requestId = readNumber(data, "requestId");
146
+ if (code === "UNAUTHORIZED") {
147
+ return "Check that the API key is present, valid, and belongs to the workspace you intend to use.";
148
+ }
149
+ if (code === "TOO_MANY_REQUESTS") {
150
+ const resetAt = readString(data, "resetAt");
151
+ return resetAt
152
+ ? `Wait until ${resetAt}, then retry the request.`
153
+ : "Wait for the rate limit window to reset, then retry the request.";
154
+ }
155
+ if (hasHigherModelHint(message, data)) {
156
+ const requestedModel = readString(data, "requestedModel");
157
+ return requestedModel
158
+ ? `Retry with a higher model than '${requestedModel}', or reduce the file size/pages before sending it again.`
159
+ : "Retry with a higher model, or reduce the file size/pages before sending it again.";
160
+ }
161
+ if (lower.includes("password-protected")) {
162
+ return "Remove the document password or upload an unlocked copy.";
163
+ }
164
+ if (lower.includes("json schema")) {
165
+ return "Fix the JSON Schema shape, then retry. Object schemas need properties, array schemas need items, and enum values must match their declared types.";
166
+ }
167
+ if (lower.includes("no public pdf url")) {
168
+ return "Provide a direct PDF URL or upload the paper file directly.";
169
+ }
170
+ if (lower.includes("url") ||
171
+ lower.includes("google drive") ||
172
+ lower.includes("docs returned an html page")) {
173
+ return "Use a direct, publicly fetchable document URL, or upload the file/base64 content instead.";
174
+ }
175
+ if (lower.includes("base64")) {
176
+ return "Send only the raw base64 document content, without a data URL prefix or invalid characters.";
177
+ }
178
+ if (lower.includes("unsupported format")) {
179
+ return "Convert the file to a supported format such as PDF, DOCX, XLSX, PPTX, CSV, TXT, HTML, or a supported image format.";
180
+ }
181
+ if (lower.includes("no input provided") || lower.includes("empty document")) {
182
+ return "Send exactly one document source: url, file, or base64, and make sure it is not empty.";
183
+ }
184
+ if (lower.includes("no readable text") ||
185
+ lower.includes("no text content") ||
186
+ lower.includes("document appears to be empty")) {
187
+ return "Upload a clearer scan or a source document with selectable/readable text.";
188
+ }
189
+ if (code === "NOT_FOUND") {
190
+ return "Check the identifier or URL and retry with a value that PDF Vector can resolve.";
191
+ }
192
+ if (code === "INTERNAL_SERVER_ERROR") {
193
+ return requestId
194
+ ? `Retry the request. If it keeps failing, contact support with requestId ${requestId}.`
195
+ : "Retry the request. If it keeps failing, contact support with the request details.";
196
+ }
197
+ if (code === "UNPROCESSABLE_CONTENT") {
198
+ return "Review the document quality and requested operation, then retry with clearer content or a simpler extraction prompt/schema.";
199
+ }
200
+ return "Fix the request and retry.";
201
+ }
17
202
  export class PDFVectorError extends Error {
18
203
  code;
19
204
  status;
@@ -21,6 +206,15 @@ export class PDFVectorError extends Error {
21
206
  requestId;
22
207
  documentId;
23
208
  userError;
209
+ category;
210
+ origin;
211
+ title;
212
+ suggestion;
213
+ retryable;
214
+ retryableWithHigherModel;
215
+ requestedModel;
216
+ reasonCode;
217
+ supportMessage;
24
218
  constructor(options) {
25
219
  super(options.message, options.cause ? { cause: options.cause } : undefined);
26
220
  this.name = new.target.name;
@@ -29,7 +223,47 @@ export class PDFVectorError extends Error {
29
223
  this.data = options.data ?? {};
30
224
  this.requestId = readNumber(this.data, "requestId");
31
225
  this.documentId = readString(this.data, "documentId");
32
- this.userError = this.data.userError === true;
226
+ this.category = inferCategory(this.code, this.message, this.data);
227
+ this.origin = inferOrigin(this.code, this.category, this.data);
228
+ this.userError = this.origin === "user";
229
+ this.title = inferTitle(this.code, this.name, this.message);
230
+ this.suggestion = inferSuggestion(this.code, this.message, this.data);
231
+ this.retryableWithHigherModel = hasHigherModelHint(this.message, this.data);
232
+ this.retryable =
233
+ this.retryableWithHigherModel ||
234
+ this.code === "TOO_MANY_REQUESTS" ||
235
+ this.code === "INTERNAL_SERVER_ERROR";
236
+ this.requestedModel = readString(this.data, "requestedModel");
237
+ this.reasonCode = readString(this.data, "code");
238
+ this.supportMessage = this.requestId
239
+ ? `PDF Vector request ${this.requestId} failed with ${this.code}: ${this.message}`
240
+ : `PDF Vector request failed with ${this.code}: ${this.message}`;
241
+ }
242
+ toAgentError() {
243
+ return {
244
+ name: this.name,
245
+ code: this.code,
246
+ status: this.status,
247
+ category: this.category,
248
+ origin: this.origin,
249
+ userError: this.userError,
250
+ title: this.title,
251
+ message: this.message,
252
+ suggestion: this.suggestion,
253
+ retryable: this.retryable,
254
+ retryableWithHigherModel: this.retryableWithHigherModel,
255
+ ...(this.requestId !== undefined ? { requestId: this.requestId } : {}),
256
+ ...(this.documentId !== undefined ? { documentId: this.documentId } : {}),
257
+ ...(this.reasonCode !== undefined ? { reasonCode: this.reasonCode } : {}),
258
+ ...(this.requestedModel !== undefined
259
+ ? { requestedModel: this.requestedModel }
260
+ : {}),
261
+ supportMessage: this.supportMessage,
262
+ data: this.data,
263
+ };
264
+ }
265
+ toJSON() {
266
+ return this.toAgentError();
33
267
  }
34
268
  }
35
269
  export class BadRequestError extends PDFVectorError {
@@ -50,10 +284,12 @@ export class NotFoundError extends PDFVectorError {
50
284
  export class TooManyRequestsError extends PDFVectorError {
51
285
  limit;
52
286
  resetAt;
287
+ retryAfterSeconds;
53
288
  constructor(options) {
54
289
  super({ ...options, code: "TOO_MANY_REQUESTS", status: 429 });
55
290
  this.limit = readNumber(options.data, "limit");
56
291
  this.resetAt = readString(options.data, "resetAt");
292
+ this.retryAfterSeconds = readNumber(options.data, "retryAfterSeconds");
57
293
  }
58
294
  }
59
295
  export class ConflictError extends PDFVectorError {
@@ -82,9 +318,15 @@ export class FileTooLargeError extends BadRequestError {
82
318
  model;
83
319
  constructor(options) {
84
320
  super(options);
85
- this.fileSizeMB = readNumber(options.data, "fileSizeMB");
86
- this.limitMB = readNumber(options.data, "limitMB");
87
- this.model = readString(options.data, "model");
321
+ this.fileSizeMB =
322
+ readNumber(options.data, "fileSizeMB") ??
323
+ readNumberFromMatch(options.message, /file too large:\s*([\d.]+)\s*mb/i);
324
+ this.limitMB =
325
+ readNumber(options.data, "limitMB") ??
326
+ readNumberFromMatch(options.message, /exceeds the\s*([\d.]+)\s*mb limit/i);
327
+ this.model =
328
+ readString(options.data, "model") ??
329
+ readStringFromMatch(options.message, /for the '([^']+)' model/i);
88
330
  }
89
331
  }
90
332
  export class PageLimitExceededError extends BadRequestError {
@@ -93,9 +335,15 @@ export class PageLimitExceededError extends BadRequestError {
93
335
  model;
94
336
  constructor(options) {
95
337
  super(options);
96
- this.pageCount = readNumber(options.data, "pageCount");
97
- this.pageLimit = readNumber(options.data, "pageLimit");
98
- this.model = readString(options.data, "model");
338
+ this.pageCount =
339
+ readNumber(options.data, "pageCount") ??
340
+ readNumberFromMatch(options.message, /document has\s*([\d.]+)\s*pages/i);
341
+ this.pageLimit =
342
+ readNumber(options.data, "pageLimit") ??
343
+ readNumberFromMatch(options.message, /exceeds the\s*([\d.]+)-page limit/i);
344
+ this.model =
345
+ readString(options.data, "model") ??
346
+ readStringFromMatch(options.message, /for the '([^']+)' model/i);
99
347
  }
100
348
  }
101
349
  export class PasswordProtectedError extends BadRequestError {
@@ -105,8 +353,12 @@ export class UnsupportedFormatError extends BadRequestError {
105
353
  supportedFormats;
106
354
  constructor(options) {
107
355
  super(options);
108
- this.format = readString(options.data, "format");
109
- this.supportedFormats = readString(options.data, "supportedFormats");
356
+ this.format =
357
+ readString(options.data, "format") ??
358
+ readStringFromMatch(options.message, /unsupported format \(([^)]+)\)/i);
359
+ this.supportedFormats =
360
+ readString(options.data, "supportedFormats") ??
361
+ readStringFromMatch(options.message, /supported formats:\s*(.+)$/i);
110
362
  }
111
363
  }
112
364
  export class URLFetchError extends BadRequestError {
@@ -116,26 +368,42 @@ export class URLFetchError extends BadRequestError {
116
368
  constructor(options) {
117
369
  super(options);
118
370
  this.url = readString(options.data, "url");
119
- this.statusCode = readNumber(options.data, "statusCode");
120
- this.statusText = readString(options.data, "statusText");
371
+ this.statusCode =
372
+ readNumber(options.data, "statusCode") ??
373
+ readNumberFromMatch(options.message, /document from url:\s*(\d{3})/i);
374
+ this.statusText =
375
+ readString(options.data, "statusText") ??
376
+ readStringFromMatch(options.message, /document from url:\s*\d{3}\s+(.+)$/i);
121
377
  }
122
378
  }
379
+ export class InvalidDocumentURLError extends BadRequestError {
380
+ }
381
+ export class InvalidBase64Error extends BadRequestError {
382
+ }
123
383
  export class TierNotSupportedError extends BadRequestError {
124
384
  documentType;
125
385
  model;
126
386
  allowedTypes;
127
387
  constructor(options) {
128
388
  super(options);
129
- this.documentType = readString(options.data, "documentType");
130
- this.model = readString(options.data, "model");
131
- this.allowedTypes = readStringArray(options.data, "allowedTypes");
389
+ this.documentType =
390
+ readString(options.data, "documentType") ??
391
+ readStringFromMatch(options.message, /file type '([^']+)'/i);
392
+ this.model =
393
+ readString(options.data, "model") ??
394
+ readStringFromMatch(options.message, /for the '([^']+)' tier/i);
395
+ this.allowedTypes =
396
+ readStringArray(options.data, "allowedTypes") ??
397
+ readListFromMatch(options.message, /supported types:\s*(.+)$/i);
132
398
  }
133
399
  }
134
400
  export class InvalidSchemaError extends BadRequestError {
135
401
  reason;
136
402
  constructor(options) {
137
403
  super(options);
138
- this.reason = readString(options.data, "reason");
404
+ this.reason =
405
+ readString(options.data, "reason") ??
406
+ readStringFromMatch(options.message, /invalid json schema:\s*(.+)$/i);
139
407
  }
140
408
  }
141
409
  export class NoInputProvidedError extends BadRequestError {
@@ -153,6 +421,28 @@ export class ExtractionFailedError extends UnprocessableContentError {
153
421
  this.rawText = readString(options.data, "rawText");
154
422
  }
155
423
  }
424
+ export class AcademicPaperNotFoundError extends NotFoundError {
425
+ input;
426
+ paperErrorCode;
427
+ constructor(options) {
428
+ super(options);
429
+ this.input = readString(options.data, "input");
430
+ this.paperErrorCode = readString(options.data, "code");
431
+ }
432
+ }
433
+ export class NoPublicPDFError extends NotFoundError {
434
+ input;
435
+ paperTitle;
436
+ doi;
437
+ providerURL;
438
+ constructor(options) {
439
+ super(options);
440
+ this.input = readString(options.data, "input");
441
+ this.paperTitle = readString(options.data, "title");
442
+ this.doi = readString(options.data, "doi");
443
+ this.providerURL = readString(options.data, "providerURL");
444
+ }
445
+ }
156
446
  const ORPC_CODE_TO_CLASS = {
157
447
  BAD_REQUEST: BadRequestError,
158
448
  UNAUTHORIZED: UnauthorizedError,
@@ -189,6 +479,11 @@ function selectSpecializedClass(code, message, data) {
189
479
  lower.startsWith("url did not return a supported document") ||
190
480
  lower.includes("returned an html page instead of a document"))
191
481
  return URLFetchError;
482
+ if (lower.startsWith("invalid document url") ||
483
+ lower.startsWith("document url must use"))
484
+ return InvalidDocumentURLError;
485
+ if (lower.startsWith("invalid base64-encoded string"))
486
+ return InvalidBase64Error;
192
487
  if (lower.startsWith("no input provided") ||
193
488
  lower.startsWith("empty document content"))
194
489
  return NoInputProvidedError;
@@ -205,6 +500,12 @@ function selectSpecializedClass(code, message, data) {
205
500
  if (data.hint !== undefined || data.rawText !== undefined)
206
501
  return ExtractionFailedError;
207
502
  }
503
+ if (code === "NOT_FOUND") {
504
+ if (data.code === "NO_PUBLIC_PDF" || lower.includes("no public pdf url"))
505
+ return NoPublicPDFError;
506
+ if (lower.startsWith("paper not found"))
507
+ return AcademicPaperNotFoundError;
508
+ }
208
509
  return ORPC_CODE_TO_CLASS[code];
209
510
  }
210
511
  export function fromORPCError(error) {
@@ -227,3 +528,6 @@ export function fromORPCError(error) {
227
528
  export function isPDFVectorError(error) {
228
529
  return error instanceof PDFVectorError;
229
530
  }
531
+ export function isPDFVectorUserError(error) {
532
+ return error instanceof PDFVectorError && error.userError;
533
+ }
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @pdfvector/instance-client
2
2
 
3
+ ## 0.0.51
4
+ ### Patch Changes
5
+
6
+
7
+
8
+ - [#244](https://github.com/phuctm97/pdfvector/pull/244) [`d751cdd`](https://github.com/phuctm97/pdfvector/commit/d751cdde1c208c3298d1a0c2c34406e724e53264) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Improve PDF Vector SDK error handling.
9
+
10
+ ## 0.0.50
11
+ ### Patch Changes
12
+
13
+ - Updated dependencies [[`2c8691c`](https://github.com/phuctm97/pdfvector/commit/2c8691c9bbd251ff7b7a153fd4254d9360c11c08)]:
14
+ - @pdfvector/instance-contract@0.0.47
15
+
3
16
  ## 0.0.49
4
17
  ### Patch Changes
5
18
 
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # PDF Vector TypeScript/JavaScript SDK
2
2
 
3
- The official TypeScript/JavaScript SDK for the [PDF Vector](https://www.pdfvector.com) API: Parse PDF, Word, Image, and Excel documents to clean, structured markdown format, ask questions about documents using AI, extract structured data from documents with JSON Schema, search across multiple academic databases with a unified API, fetch specific publications by DOI, PubMed ID, ArXiv ID, and more, find relevant academic citations for paragraphs of text, explore paper citation graphs, find similar papers, and search for research grants across US, EU, and UK funding databases.
3
+ The official TypeScript/JavaScript SDK for the [PDF Vector](https://www.pdfvector.com) API: Parse PDF, Word, Image, and Excel documents to clean, structured markdown format, ask questions about documents using AI, extract structured data from documents with JSON Schema, search across multiple academic databases with a unified API, fetch specific publications by DOI, PubMed ID, ArXiv ID, and more, convert academic paper IDs or provider URLs to markdown, find relevant academic citations for paragraphs of text, explore paper citation graphs, find similar papers, and search for research grants across US, EU, and UK funding databases.
4
4
 
5
5
  ## Installation
6
6
 
@@ -380,6 +380,36 @@ result.errors?.forEach((error) => {
380
380
 
381
381
  **Supported ID types:** DOI, PubMed ID, ArXiv ID, Semantic Scholar ID, ERIC ID, Europe PMC ID, OpenAlex ID.
382
382
 
383
+ ### Parse Academic Paper to Markdown
384
+
385
+ Resolve a paper ID or provider URL to its public PDF and parse it into markdown. Uses the same per-page model pricing as Document Parse.
386
+
387
+ ```typescript
388
+ const result = await client.academic.parse({
389
+ id: "1706.03762", // DOI, PubMed ID, ArXiv ID, Semantic Scholar ID, or provider URL
390
+ model: "auto", // "auto" | "nano" | "mini" | "pro" | "max"
391
+ });
392
+
393
+ console.log(`Title: ${result.title}`);
394
+ console.log(`Provider: ${result.detectedProvider}`);
395
+ console.log(`PDF: ${result.pdfURL}`);
396
+ console.log(result.markdown);
397
+ console.log(`Pages: ${result.pageCount}, Credits: ${result.credits}`);
398
+ ```
399
+
400
+ You can pass a provider URL instead of an ID:
401
+
402
+ ```typescript
403
+ const result = await client.academic.parse({
404
+ url: "https://arxiv.org/abs/1706.03762",
405
+ model: "nano",
406
+ });
407
+
408
+ console.log(result.markdown);
409
+ ```
410
+
411
+ Provide exactly one of `id` or `url`. If the paper cannot be found, has no public PDF, or the resolved PDF cannot be fetched, the API returns a typed `PDFVectorError` with a clear message and no parse credits are charged.
412
+
383
413
  ### Find Citations for a Paragraph
384
414
 
385
415
  Find relevant academic citations for each sentence in a paragraph using semantic similarity. Costs 2 credits per sentence analyzed.
@@ -573,6 +603,7 @@ console.log(resultB.documentId); // "doc-b"
573
603
  | Bank Statement Extract | 6 | 10 | 14 | 18 | /page |
574
604
  | Academic Search | 2 | 2 | 2 | 2 | /request |
575
605
  | Academic Fetch | 2 | 2 | 2 | 2 | /request |
606
+ | Academic Parse | 1 | 2 | 4 | 8 | /page |
576
607
  | Academic Find Citations | 2 | 2 | 2 | 2 | /sentence |
577
608
  | Academic Paper Graph | 2+ | 2+ | 2+ | 2+ | /request |
578
609
  | Academic Similar Papers | 3 | 3 | 3 | 3 | /request |
@@ -580,10 +611,14 @@ console.log(resultB.documentId); // "doc-b"
580
611
 
581
612
  ## Error Handling
582
613
 
583
- All API errors are thrown as `PDFVectorError` instances. The SDK transparently maps every server error into the most specific subclass it can, so you can branch on the type using `instanceof` and read typed metadata fields directly.
614
+ All API errors are thrown as `PDFVectorError` instances. The SDK maps server errors into specific subclasses and adds user/agent-friendly fields such as `title`, `suggestion`, `userError`, retry flags, and `requestId`.
584
615
 
585
616
  ```typescript
586
- import { createClient, PDFVectorError } from "@pdfvector/instance-client";
617
+ import {
618
+ PDFVectorError,
619
+ createClient,
620
+ isPDFVectorUserError,
621
+ } from "@pdfvector/instance-client";
587
622
 
588
623
  const client = createClient({ apiKey: "your-api-key" });
589
624
 
@@ -593,35 +628,59 @@ try {
593
628
  });
594
629
  console.log(result.markdown);
595
630
  } catch (error) {
631
+ if (isPDFVectorUserError(error)) {
632
+ console.error(error.title);
633
+ console.error(error.suggestion);
634
+ return;
635
+ }
636
+
596
637
  if (error instanceof PDFVectorError) {
597
- console.error(`API Error [${error.code}]: ${error.message}`);
598
- console.error(`HTTP Status: ${error.status}`);
599
- console.error(`Request ID: ${error.requestId}`); // server-assigned, useful for support
600
- console.error(`Document ID: ${error.documentId}`); // echoed back if you set one
601
- console.error(`User error: ${error.userError}`); // true if caused by your input
602
- } else {
603
- // Network errors (DNS, connection refused, timeout) bubble up as TypeError.
604
- console.error("Unexpected Error:", error);
638
+ console.error(error.supportMessage);
639
+ console.error(error.toAgentError());
640
+ return;
641
+ }
642
+
643
+ // Network errors (DNS, connection refused, timeout) bubble up as TypeError.
644
+ console.error("Unexpected Error:", error);
645
+ }
646
+ ```
647
+
648
+ ### User errors
649
+
650
+ Use `isPDFVectorUserError(error)` or `error.userError` for caller-fixable failures that should usually be shown to the user instead of reported as system failures. For example, URL input failures such as `URL did not return a supported document` are `URLFetchError` instances with `userError: true`.
651
+
652
+ ```typescript
653
+ import { isPDFVectorUserError, isPDFVectorError } from "@pdfvector/instance-client";
654
+
655
+ try {
656
+ await client.document.parse({ url: "https://example.com/page.html" });
657
+ } catch (error) {
658
+ if (isPDFVectorUserError(error)) {
659
+ console.error(error.suggestion);
660
+ }
661
+
662
+ if (isPDFVectorError(error) && error.retryableWithHigherModel) {
663
+ console.error("Retry with a stronger model or a smaller document.");
605
664
  }
606
665
  }
607
666
  ```
608
667
 
609
668
  ### Branching on specific error types
610
669
 
611
- Every error class extends `PDFVectorError`, so you can use `instanceof` to handle specific cases. Specialized subclasses expose typed fields pulled from the error's `data` payload:
670
+ Every error class extends `PDFVectorError`, so you can use `instanceof` to handle specific cases. Specialized subclasses expose typed fields pulled from the error payload:
612
671
 
613
672
  ```typescript
614
673
  import {
615
- createClient,
674
+ EmptyDocumentError,
675
+ ExtractionFailedError,
616
676
  FileTooLargeError,
677
+ InvalidSchemaError,
678
+ NoPublicPDFError,
617
679
  PageLimitExceededError,
618
680
  PasswordProtectedError,
619
- URLFetchError,
620
- UnauthorizedError,
621
681
  TooManyRequestsError,
622
- EmptyDocumentError,
623
- ExtractionFailedError,
624
- PDFVectorError,
682
+ UnauthorizedError,
683
+ URLFetchError,
625
684
  } from "@pdfvector/instance-client";
626
685
 
627
686
  try {
@@ -633,14 +692,18 @@ try {
633
692
  );
634
693
  } else if (error instanceof PageLimitExceededError) {
635
694
  console.error(
636
- `Document has ${error.pageCount} pages ${error.model} only supports up to ${error.pageLimit}`,
695
+ `Document has ${error.pageCount} pages; ${error.model} supports up to ${error.pageLimit}`,
637
696
  );
638
697
  } else if (error instanceof PasswordProtectedError) {
639
698
  console.error("Remove the password from the file and try again");
640
699
  } else if (error instanceof URLFetchError) {
641
- console.error(`Could not fetch ${error.url}: ${error.statusCode} ${error.statusText}`);
700
+ console.error(error.suggestion);
701
+ } else if (error instanceof InvalidSchemaError) {
702
+ console.error(error.reason);
703
+ } else if (error instanceof NoPublicPDFError) {
704
+ console.error("Provide a direct PDF URL or upload the paper file directly");
642
705
  } else if (error instanceof UnauthorizedError) {
643
- console.error("Invalid API key check your dashboard");
706
+ console.error("Invalid API key; check your dashboard");
644
707
  } else if (error instanceof TooManyRequestsError) {
645
708
  console.error(`Rate limit ${error.limit} exceeded; resets at ${error.resetAt}`);
646
709
  } else if (error instanceof EmptyDocumentError) {
@@ -648,34 +711,6 @@ try {
648
711
  } else if (error instanceof ExtractionFailedError) {
649
712
  console.error(`Extraction failed. Hint: ${error.hint}`);
650
713
  if (error.rawText) console.error(`Model output sample: ${error.rawText}`);
651
- } else if (error instanceof PDFVectorError) {
652
- // Catch-all for any error code not specifically handled
653
- console.error(`API Error [${error.code}]: ${error.message}`);
654
- }
655
- }
656
- ```
657
-
658
- You can also branch on the error code if you prefer:
659
-
660
- ```typescript
661
- try {
662
- await client.document.parse({ url: "..." });
663
- } catch (error) {
664
- if (error instanceof PDFVectorError) {
665
- switch (error.code) {
666
- case "UNAUTHORIZED":
667
- console.error("Invalid API key");
668
- break;
669
- case "BAD_REQUEST":
670
- console.error("Validation error:", error.message);
671
- break;
672
- case "UNPROCESSABLE_CONTENT":
673
- console.error("Could not process document:", error.message);
674
- break;
675
- case "INTERNAL_SERVER_ERROR":
676
- console.error(`Server error (requestId: ${error.requestId}):`, error.message);
677
- break;
678
- }
679
714
  }
680
715
  }
681
716
  ```
@@ -690,13 +725,17 @@ PDFVectorError
690
725
  │ ├── PasswordProtectedError
691
726
  │ ├── UnsupportedFormatError — format, supportedFormats
692
727
  │ ├── URLFetchError — url, statusCode, statusText
728
+ │ ├── InvalidDocumentURLError
729
+ │ ├── InvalidBase64Error
693
730
  │ ├── TierNotSupportedError — documentType, model, allowedTypes
694
731
  │ ├── InvalidSchemaError — reason
695
732
  │ └── NoInputProvidedError
696
733
  ├── UnauthorizedError (401)
697
734
  ├── NotFoundError (404)
735
+ │ ├── AcademicPaperNotFoundError — input, paperErrorCode
736
+ │ └── NoPublicPDFError — input, paperTitle, doi, providerURL
698
737
  ├── ConflictError (409)
699
- ├── TooManyRequestsError (429) — limit, resetAt
738
+ ├── TooManyRequestsError (429) — limit, resetAt, retryAfterSeconds
700
739
  ├── UnprocessableContentError (422)
701
740
  │ ├── EmptyDocumentError
702
741
  │ ├── NoTextDetectedError
@@ -709,42 +748,36 @@ PDFVectorError
709
748
 
710
749
  | Field | Type | Description |
711
750
  |-------|------|-------------|
712
- | `code` | `string` | The ORPC error code (`BAD_REQUEST`, `UNAUTHORIZED`, etc.) |
713
- | `status` | `number` | HTTP status code (400, 401, 404, 409, 422, 429, 500, 501) |
714
- | `message` | `string` | Human-readable error message |
715
- | `data` | `Record<string, unknown>` | Raw error payload from the server |
716
- | `requestId` | `number \| undefined` | Server-assigned request ID — include in support tickets |
751
+ | `code` | `string` | API error code (`BAD_REQUEST`, `UNAUTHORIZED`, etc.) |
752
+ | `status` | `number` | HTTP-style status code |
753
+ | `title` | `string` | Short readable summary |
754
+ | `message` | `string` | Server-provided error message |
755
+ | `suggestion` | `string` | Recommended next action |
756
+ | `category` | `string` | `authentication`, `validation`, `document_input`, `document_processing`, `rate_limit`, `not_found`, `conflict`, `unsupported`, or `server` |
757
+ | `origin` | `"user" \| "system"` | Whether the failure is caller-fixable or likely server/provider-side |
758
+ | `userError` | `boolean` | `true` for expected caller-fixable failures |
759
+ | `retryable` | `boolean` | `true` when retrying may help |
760
+ | `retryableWithHigherModel` | `boolean` | `true` when retrying with a stronger model or smaller document may help |
761
+ | `requestId` | `number \| undefined` | Server-assigned request ID; include in support tickets |
717
762
  | `documentId` | `string \| undefined` | Echoed back if you passed `context.documentId` |
718
- | `userError` | `boolean` | `true` if the failure was caused by your input (vs. a server-side issue) |
719
- | `cause` | `unknown` | Original error (the underlying `ORPCError` from the wire) |
720
-
721
- ### Type guard
722
-
723
- If you'd rather not import `PDFVectorError` just to do an `instanceof` check, use the `isPDFVectorError` guard:
724
-
725
- ```typescript
726
- import { isPDFVectorError } from "@pdfvector/instance-client";
763
+ | `reasonCode` | `string \| undefined` | More specific server reason when available, such as `NO_PUBLIC_PDF` |
764
+ | `supportMessage` | `string` | Compact support/logging message |
765
+ | `data` | `Record<string, unknown>` | Raw error payload from the server |
766
+ | `cause` | `unknown` | Original underlying error |
727
767
 
728
- try {
729
- await client.document.parse({ url: "..." });
730
- } catch (error) {
731
- if (isPDFVectorError(error)) {
732
- console.error(error.code, error.message, error.requestId);
733
- }
734
- }
735
- ```
768
+ Use `error.toAgentError()` or `JSON.stringify(error)` when you need a serializable error object for logs, workflows, retry planners, or agent tool responses.
736
769
 
737
770
  ### Error Codes
738
771
 
739
772
  | Code | Status | Description |
740
773
  |------|--------|-------------|
741
- | `BAD_REQUEST` | 400 | Input validation failed (e.g., missing fields, invalid URL, file too large, page limit exceeded, invalid JSON Schema) |
774
+ | `BAD_REQUEST` | 400 | Input validation failed, including invalid URLs, unsupported formats, file size limits, page limits, invalid base64, and invalid JSON Schema |
742
775
  | `UNAUTHORIZED` | 401 | Missing or invalid API key |
743
- | `NOT_FOUND` | 404 | Resource not found (e.g., academic paper ID, version) |
776
+ | `NOT_FOUND` | 404 | Resource not found, including academic paper IDs and papers without public PDFs |
744
777
  | `CONFLICT` | 409 | Operation conflicts with the current state |
745
- | `UNPROCESSABLE_CONTENT` | 422 | Document could not be processed (empty, no readable text, extraction failed) |
778
+ | `UNPROCESSABLE_CONTENT` | 422 | Document could not be processed, including empty documents, no readable text, and extraction failures |
746
779
  | `TOO_MANY_REQUESTS` | 429 | Rate limit exceeded |
747
- | `INTERNAL_SERVER_ERROR` | 500 | Server-side failure capture the `requestId` for support |
780
+ | `INTERNAL_SERVER_ERROR` | 500 | Server-side failure; capture the `requestId` for support |
748
781
  | `NOT_IMPLEMENTED` | 501 | Endpoint not available on this instance |
749
782
 
750
783
  ## TypeScript Support
@@ -755,6 +788,7 @@ The SDK is written in TypeScript and includes full type definitions:
755
788
  import {
756
789
  createClient,
757
790
  isPDFVectorError,
791
+ isPDFVectorUserError,
758
792
  // Base error class — all errors inherit from this
759
793
  PDFVectorError,
760
794
  // HTTP-aligned error categories
@@ -772,12 +806,16 @@ import {
772
806
  PasswordProtectedError,
773
807
  UnsupportedFormatError,
774
808
  URLFetchError,
809
+ InvalidDocumentURLError,
810
+ InvalidBase64Error,
775
811
  TierNotSupportedError,
776
812
  InvalidSchemaError,
777
813
  NoInputProvidedError,
778
814
  EmptyDocumentError,
779
815
  NoTextDetectedError,
780
816
  ExtractionFailedError,
817
+ AcademicPaperNotFoundError,
818
+ NoPublicPDFError,
781
819
  // Underlying ORPC error — re-exported for advanced use cases
782
820
  ORPCError,
783
821
  } from "@pdfvector/instance-client";
@@ -789,7 +827,10 @@ import type {
789
827
  ContractInputs,
790
828
  ContractOutputs,
791
829
  PDFVectorModel,
830
+ PDFVectorAgentError,
831
+ PDFVectorErrorCategory,
792
832
  PDFVectorErrorCode,
833
+ PDFVectorErrorOrigin,
793
834
  } from "@pdfvector/instance-client";
794
835
  ```
795
836
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pdfvector/instance-client",
3
- "version": "0.0.49",
3
+ "version": "0.0.51",
4
4
  "type": "module",
5
5
  "description": "Official TypeScript/JavaScript SDK for PDF Vector API - Parse PDF/Word/Image/Excel documents to clean, structured markdown format and search academic publications across multiple databases",
6
6
  "license": "MIT",
@@ -33,7 +33,7 @@
33
33
  },
34
34
  "main": ".tsc/lib/index.js",
35
35
  "dependencies": {
36
- "@pdfvector/instance-contract": "^0.0.46"
36
+ "@pdfvector/instance-contract": "^0.0.47"
37
37
  },
38
38
  "files": [
39
39
  ".tsc",