@pdfvector/instance-client 0.0.18 → 0.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.tsc/lib/index.d.ts +55 -4
- package/.tsc/lib/index.js +78 -6
- package/.tsc/lib/internal.d.ts +5 -0
- package/.tsc/lib/internal.js +4 -0
- package/CHANGELOG.md +17 -0
- package/README.md +562 -0
- package/package.json +31 -2
package/.tsc/lib/index.d.ts
CHANGED
|
@@ -1,8 +1,59 @@
|
|
|
1
1
|
import type { ContractRouterClient } from "@orpc/contract";
|
|
2
2
|
import type { contract } from "@pdfvector/instance-contract";
|
|
3
3
|
export interface CreateClientOptions {
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
/** Domain of the PDFVector instance server (e.g., "your-instance.pdfvector.com"). Defaults to "global.pdfvector.com". */
|
|
5
|
+
domain?: string;
|
|
6
|
+
/** API key for Bearer token authentication */
|
|
7
|
+
apiKey?: string;
|
|
8
|
+
/** @internal Server secret */
|
|
9
|
+
secret?: string;
|
|
6
10
|
}
|
|
7
|
-
|
|
8
|
-
export
|
|
11
|
+
/** Per-request context passed as the second argument to any API call. */
|
|
12
|
+
export interface ClientContext {
|
|
13
|
+
/** Document ID for usage tracking. Returned in response for document, identity, invoice, and bank statement endpoints. */
|
|
14
|
+
documentId?: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Custom error class for PDFVector API errors.
|
|
18
|
+
*
|
|
19
|
+
* All API errors thrown by the client are instances of this class.
|
|
20
|
+
* Provides structured access to error code, HTTP status, message, and additional data.
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* ```typescript
|
|
24
|
+
* try {
|
|
25
|
+
* await client.document.parse({ url: "..." });
|
|
26
|
+
* } catch (error) {
|
|
27
|
+
* if (error instanceof PDFVectorError) {
|
|
28
|
+
* console.error(error.code); // "UNAUTHORIZED" | "BAD_REQUEST" | ...
|
|
29
|
+
* console.error(error.status); // 401 | 400 | 422 | 500
|
|
30
|
+
* console.error(error.message); // "Invalid API key"
|
|
31
|
+
* console.error(error.data); // { requestId: 1 }
|
|
32
|
+
* }
|
|
33
|
+
* }
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export declare class PDFVectorError extends Error {
|
|
37
|
+
/** Error code identifying the type of error */
|
|
38
|
+
readonly code: string;
|
|
39
|
+
/** HTTP status code */
|
|
40
|
+
readonly status: number;
|
|
41
|
+
/** Additional error data from the server (e.g., requestId) */
|
|
42
|
+
readonly data: unknown;
|
|
43
|
+
constructor(options: {
|
|
44
|
+
code: string;
|
|
45
|
+
message: string;
|
|
46
|
+
status: number;
|
|
47
|
+
data?: unknown;
|
|
48
|
+
});
|
|
49
|
+
/**
|
|
50
|
+
* Type guard to check if an unknown error is a PDFVectorError.
|
|
51
|
+
*/
|
|
52
|
+
static is(error: unknown): error is PDFVectorError;
|
|
53
|
+
}
|
|
54
|
+
/** @internal */
|
|
55
|
+
export declare function _buildClient(options?: CreateClientOptions): Client;
|
|
56
|
+
export declare function createClient(options?: CreateClientOptions): PublicClient;
|
|
57
|
+
type Client = ContractRouterClient<typeof contract, ClientContext>;
|
|
58
|
+
export type PublicClient = Omit<ContractRouterClient<typeof contract, ClientContext>, "admin" | "free">;
|
|
59
|
+
export type { ContractInputs, ContractOutputs, PDFVectorModel, } from "@pdfvector/instance-contract";
|
package/.tsc/lib/index.js
CHANGED
|
@@ -1,11 +1,83 @@
|
|
|
1
|
-
import { createORPCClient } from "@orpc/client";
|
|
1
|
+
import { createORPCClient, ORPCError, onError } from "@orpc/client";
|
|
2
2
|
import { RPCLink } from "@orpc/client/fetch";
|
|
3
|
-
|
|
3
|
+
const DEFAULT_DOMAIN = "global.pdfvector.com";
|
|
4
|
+
/**
|
|
5
|
+
* Custom error class for PDFVector API errors.
|
|
6
|
+
*
|
|
7
|
+
* All API errors thrown by the client are instances of this class.
|
|
8
|
+
* Provides structured access to error code, HTTP status, message, and additional data.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* try {
|
|
13
|
+
* await client.document.parse({ url: "..." });
|
|
14
|
+
* } catch (error) {
|
|
15
|
+
* if (error instanceof PDFVectorError) {
|
|
16
|
+
* console.error(error.code); // "UNAUTHORIZED" | "BAD_REQUEST" | ...
|
|
17
|
+
* console.error(error.status); // 401 | 400 | 422 | 500
|
|
18
|
+
* console.error(error.message); // "Invalid API key"
|
|
19
|
+
* console.error(error.data); // { requestId: 1 }
|
|
20
|
+
* }
|
|
21
|
+
* }
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
export class PDFVectorError extends Error {
|
|
25
|
+
/** Error code identifying the type of error */
|
|
26
|
+
code;
|
|
27
|
+
/** HTTP status code */
|
|
28
|
+
status;
|
|
29
|
+
/** Additional error data from the server (e.g., requestId) */
|
|
30
|
+
data;
|
|
31
|
+
constructor(options) {
|
|
32
|
+
super(options.message);
|
|
33
|
+
this.name = "PDFVectorError";
|
|
34
|
+
this.code = options.code;
|
|
35
|
+
this.status = options.status;
|
|
36
|
+
this.data = options.data;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Type guard to check if an unknown error is a PDFVectorError.
|
|
40
|
+
*/
|
|
41
|
+
static is(error) {
|
|
42
|
+
return error instanceof PDFVectorError;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
/** @internal */
|
|
46
|
+
export function _buildClient(options) {
|
|
47
|
+
const domain = options?.domain ?? DEFAULT_DOMAIN;
|
|
48
|
+
const isLocal = domain.startsWith("localhost") || domain.startsWith("127.0.0.1");
|
|
49
|
+
const baseUrl = `${isLocal ? "http" : "https"}://${domain}`;
|
|
50
|
+
const token = options?.apiKey ?? options?.secret;
|
|
4
51
|
const link = new RPCLink({
|
|
5
|
-
url: `${
|
|
6
|
-
headers: () =>
|
|
7
|
-
|
|
8
|
-
|
|
52
|
+
url: `${baseUrl}/rpc`,
|
|
53
|
+
headers: ({ context }) => {
|
|
54
|
+
const headers = {};
|
|
55
|
+
if (token) {
|
|
56
|
+
headers.authorization = `Bearer ${token}`;
|
|
57
|
+
}
|
|
58
|
+
if (context?.documentId) {
|
|
59
|
+
headers["x-pdfvector-document-id"] = context.documentId;
|
|
60
|
+
}
|
|
61
|
+
return headers;
|
|
62
|
+
},
|
|
63
|
+
fetch: token
|
|
64
|
+
? undefined
|
|
65
|
+
: (input, init) => fetch(input, { ...init, credentials: "include" }),
|
|
66
|
+
interceptors: [
|
|
67
|
+
onError((error) => {
|
|
68
|
+
if (error instanceof ORPCError) {
|
|
69
|
+
throw new PDFVectorError({
|
|
70
|
+
code: error.code,
|
|
71
|
+
message: error.message,
|
|
72
|
+
status: error.status,
|
|
73
|
+
data: error.data,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
}),
|
|
77
|
+
],
|
|
9
78
|
});
|
|
10
79
|
return createORPCClient(link);
|
|
11
80
|
}
|
|
81
|
+
export function createClient(options) {
|
|
82
|
+
return _buildClient(options);
|
|
83
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { ContractRouterClient } from "@orpc/contract";
|
|
2
|
+
import type { contract } from "@pdfvector/instance-contract";
|
|
3
|
+
import { type ClientContext, type CreateClientOptions } from ".";
|
|
4
|
+
export type Client = ContractRouterClient<typeof contract, ClientContext>;
|
|
5
|
+
export declare function createInternalClient(options?: CreateClientOptions): Client;
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# @pdfvector/instance-client
|
|
2
2
|
|
|
3
|
+
## 0.0.20
|
|
4
|
+
### Patch Changes
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
- [#129](https://github.com/phuctm97/pdfvector/pull/129) [`111c98f`](https://github.com/phuctm97/pdfvector/commit/111c98f003f1ffb29c0515b08ad3567505396d86) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Add public/internal SDK client split, hide free API from docs, and update landing page code examples
|
|
9
|
+
|
|
10
|
+
## 0.0.19
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
- [#126](https://github.com/phuctm97/pdfvector/pull/126) [`9f3db7d`](https://github.com/phuctm97/pdfvector/commit/9f3db7d4114169d2dff0f83cad7bf87e054e9206) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Enhance instance client as publishable PDFVector SDK
|
|
16
|
+
|
|
17
|
+
- Updated dependencies [[`9f3db7d`](https://github.com/phuctm97/pdfvector/commit/9f3db7d4114169d2dff0f83cad7bf87e054e9206)]:
|
|
18
|
+
- @pdfvector/instance-contract@0.0.22
|
|
19
|
+
|
|
3
20
|
## 0.0.18
|
|
4
21
|
### Patch Changes
|
|
5
22
|
|
package/README.md
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
# PDFVector TypeScript/JavaScript SDK
|
|
2
|
+
|
|
3
|
+
The official TypeScript/JavaScript SDK for the [PDFVector](https://www.pdfvector.com) API: Parse PDF, Word, Image, and Excel documents to clean, structured markdown format, ask questions about documents using AI, extract structured data from documents with JSON Schema, search across multiple academic databases with a unified API, fetch specific publications by DOI, PubMed ID, ArXiv ID, and more, and find relevant academic citations for paragraphs of text.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @pdfvector/instance-client
|
|
9
|
+
# or
|
|
10
|
+
yarn add @pdfvector/instance-client
|
|
11
|
+
# or
|
|
12
|
+
pnpm add @pdfvector/instance-client
|
|
13
|
+
# or
|
|
14
|
+
bun add @pdfvector/instance-client
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```typescript
|
|
20
|
+
import { createClient } from "@pdfvector/instance-client";
|
|
21
|
+
|
|
22
|
+
const client = createClient({
|
|
23
|
+
apiKey: "your-api-key",
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
// Parse a document
|
|
27
|
+
const parseResult = await client.document.parse({
|
|
28
|
+
url: "https://example.com/document.pdf",
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
console.log(parseResult.markdown);
|
|
32
|
+
console.log(`Pages: ${parseResult.pageCount}, Model: ${parseResult.model}`);
|
|
33
|
+
|
|
34
|
+
// Ask questions about documents
|
|
35
|
+
const askResult = await client.document.ask({
|
|
36
|
+
url: "https://example.com/research-paper.pdf",
|
|
37
|
+
question: "What are the key findings and conclusions?",
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
console.log(askResult.markdown);
|
|
41
|
+
|
|
42
|
+
// Extract structured data using JSON Schema
|
|
43
|
+
const extractResult = await client.document.extract({
|
|
44
|
+
url: "https://example.com/research-paper.pdf",
|
|
45
|
+
prompt: "Extract the research information",
|
|
46
|
+
schema: {
|
|
47
|
+
type: "object",
|
|
48
|
+
properties: {
|
|
49
|
+
title: { type: "string" },
|
|
50
|
+
authors: { type: "array", items: { type: "string" } },
|
|
51
|
+
abstract: { type: "string" },
|
|
52
|
+
findings: { type: "array", items: { type: "string" } },
|
|
53
|
+
},
|
|
54
|
+
required: ["title", "abstract"],
|
|
55
|
+
},
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
console.log(extractResult.data);
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Authentication
|
|
62
|
+
|
|
63
|
+
Get your API key from the [PDFVector dashboard](https://app.pdfvector.com/instances/2?tab=settings).
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
const client = createClient({
|
|
67
|
+
apiKey: "your-api-key",
|
|
68
|
+
});
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Verify your credentials:
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
const status = await client.authenticate.validateCredential();
|
|
75
|
+
console.log(status.version); // Server version
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Custom Domain
|
|
79
|
+
|
|
80
|
+
By default, the SDK connects to `global.pdfvector.com`. For custom or self-hosted instances:
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
const client = createClient({
|
|
84
|
+
domain: "your-instance.pdfvector.com",
|
|
85
|
+
apiKey: "your-api-key",
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
// For local development
|
|
89
|
+
const localClient = createClient({
|
|
90
|
+
domain: "localhost:34000",
|
|
91
|
+
apiKey: "your-api-key",
|
|
92
|
+
});
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Document Processing
|
|
96
|
+
|
|
97
|
+
All document endpoints accept three input methods: `url`, `file` (File/Blob), or `base64`.
|
|
98
|
+
|
|
99
|
+
**Supported file types:** PDF, Word (.docx), Excel (.xlsx), CSV, and Image (.png, .jpg).
|
|
100
|
+
|
|
101
|
+
### Parse
|
|
102
|
+
|
|
103
|
+
Extract text content from documents:
|
|
104
|
+
|
|
105
|
+
```typescript
|
|
106
|
+
const result = await client.document.parse(
|
|
107
|
+
{
|
|
108
|
+
url: "https://example.com/document.pdf",
|
|
109
|
+
model: "auto", // "auto" | "nano" | "mini" | "pro" | "max"
|
|
110
|
+
},
|
|
111
|
+
{ context: { documentId: "my-doc-123" } }, // optional, for usage tracking
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
console.log(result.markdown); // Extracted text
|
|
115
|
+
console.log(result.pageCount); // Number of pages
|
|
116
|
+
console.log(result.model); // Model tier used
|
|
117
|
+
console.log(result.html); // Full HTML (only with 'max' model)
|
|
118
|
+
console.log(result.documentId); // "my-doc-123"
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Parse from file data
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
import { readFile } from "fs/promises";
|
|
125
|
+
|
|
126
|
+
const result = await client.document.parse(
|
|
127
|
+
{
|
|
128
|
+
file: new File([await readFile("document.pdf")], "document.pdf", {
|
|
129
|
+
type: "application/pdf",
|
|
130
|
+
}),
|
|
131
|
+
model: "auto",
|
|
132
|
+
},
|
|
133
|
+
{ context: { documentId: "uploaded-doc" } },
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
console.log(result.markdown);
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Ask
|
|
140
|
+
|
|
141
|
+
Answer questions about a document:
|
|
142
|
+
|
|
143
|
+
```typescript
|
|
144
|
+
const result = await client.document.ask(
|
|
145
|
+
{
|
|
146
|
+
url: "https://example.com/research-paper.pdf",
|
|
147
|
+
question: "What are the main findings of this study?",
|
|
148
|
+
model: "auto",
|
|
149
|
+
},
|
|
150
|
+
{ context: { documentId: "research-paper-1" } },
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
console.log(result.markdown);
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Extract
|
|
157
|
+
|
|
158
|
+
Extract structured data using a JSON Schema:
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
const result = await client.document.extract(
|
|
162
|
+
{
|
|
163
|
+
url: "https://example.com/research-paper.pdf",
|
|
164
|
+
prompt: "Extract the title, authors, and publication year",
|
|
165
|
+
schema: {
|
|
166
|
+
type: "object",
|
|
167
|
+
properties: {
|
|
168
|
+
title: { type: "string" },
|
|
169
|
+
authors: { type: "array", items: { type: "string" } },
|
|
170
|
+
year: { type: "number" },
|
|
171
|
+
},
|
|
172
|
+
required: ["title", "authors", "year"],
|
|
173
|
+
},
|
|
174
|
+
},
|
|
175
|
+
{ context: { documentId: "research-paper-1" } },
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
console.log(result.data); // { title: "...", authors: [...], year: 2024 }
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Invoice Processing
|
|
182
|
+
|
|
183
|
+
Specialized methods for processing invoices. Parse supports `pro` and `max` models only. Ask and extract support all model tiers.
|
|
184
|
+
|
|
185
|
+
### Parse Invoice
|
|
186
|
+
|
|
187
|
+
```typescript
|
|
188
|
+
const result = await client.invoice.parse(
|
|
189
|
+
{ url: "https://example.com/invoice.pdf" },
|
|
190
|
+
{ context: { documentId: "invoice-001" } },
|
|
191
|
+
);
|
|
192
|
+
|
|
193
|
+
console.log(result.markdown);
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Ask Questions About Invoices
|
|
197
|
+
|
|
198
|
+
```typescript
|
|
199
|
+
const result = await client.invoice.ask(
|
|
200
|
+
{
|
|
201
|
+
url: "https://example.com/invoice.pdf",
|
|
202
|
+
question: "What is the total amount and due date for this invoice?",
|
|
203
|
+
},
|
|
204
|
+
{ context: { documentId: "invoice-001" } },
|
|
205
|
+
);
|
|
206
|
+
|
|
207
|
+
console.log(result.markdown);
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Extract Structured Invoice Data
|
|
211
|
+
|
|
212
|
+
```typescript
|
|
213
|
+
const result = await client.invoice.extract(
|
|
214
|
+
{
|
|
215
|
+
url: "https://example.com/invoice.pdf",
|
|
216
|
+
prompt: "Extract all invoice details including vendor, items, and totals",
|
|
217
|
+
schema: {
|
|
218
|
+
type: "object",
|
|
219
|
+
properties: {
|
|
220
|
+
invoiceNumber: { type: "string" },
|
|
221
|
+
date: { type: "string" },
|
|
222
|
+
totalAmount: { type: "number" },
|
|
223
|
+
items: {
|
|
224
|
+
type: "array",
|
|
225
|
+
items: {
|
|
226
|
+
type: "object",
|
|
227
|
+
properties: {
|
|
228
|
+
description: { type: "string" },
|
|
229
|
+
quantity: { type: "number" },
|
|
230
|
+
price: { type: "number" },
|
|
231
|
+
},
|
|
232
|
+
},
|
|
233
|
+
},
|
|
234
|
+
},
|
|
235
|
+
required: ["invoiceNumber", "date", "totalAmount", "items"],
|
|
236
|
+
},
|
|
237
|
+
},
|
|
238
|
+
{ context: { documentId: "invoice-001" } },
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
console.log(result.data);
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## Identity Document Processing
|
|
245
|
+
|
|
246
|
+
Specialized methods for processing ID documents (passports, driver's licenses, ID cards). Parse supports `pro` and `max` models only. Ask and extract support all model tiers.
|
|
247
|
+
|
|
248
|
+
### Parse ID Document
|
|
249
|
+
|
|
250
|
+
```typescript
|
|
251
|
+
const result = await client.identity.parse(
|
|
252
|
+
{ url: "https://example.com/passport.pdf" },
|
|
253
|
+
{ context: { documentId: "passport-jane" } },
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
console.log(result.markdown);
|
|
257
|
+
console.log(result.documentType); // e.g., "passport"
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
### Ask Questions About ID Documents
|
|
261
|
+
|
|
262
|
+
```typescript
|
|
263
|
+
const result = await client.identity.ask(
|
|
264
|
+
{
|
|
265
|
+
url: "https://example.com/passport.pdf",
|
|
266
|
+
question: "What is the full name and date of birth on this document?",
|
|
267
|
+
},
|
|
268
|
+
{ context: { documentId: "passport-jane" } },
|
|
269
|
+
);
|
|
270
|
+
|
|
271
|
+
console.log(result.markdown);
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Extract Structured ID Document Data
|
|
275
|
+
|
|
276
|
+
```typescript
|
|
277
|
+
const result = await client.identity.extract(
|
|
278
|
+
{
|
|
279
|
+
url: "https://example.com/passport.pdf",
|
|
280
|
+
prompt: "Extract passport details from this document",
|
|
281
|
+
schema: {
|
|
282
|
+
type: "object",
|
|
283
|
+
properties: {
|
|
284
|
+
fullName: { type: "string" },
|
|
285
|
+
dateOfBirth: { type: "string" },
|
|
286
|
+
documentNumber: { type: "string" },
|
|
287
|
+
nationality: { type: "string" },
|
|
288
|
+
expirationDate: { type: "string" },
|
|
289
|
+
},
|
|
290
|
+
required: ["fullName", "documentNumber"],
|
|
291
|
+
},
|
|
292
|
+
},
|
|
293
|
+
{ context: { documentId: "passport-jane" } },
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
console.log(result.data);
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## Bank Statement Processing
|
|
300
|
+
|
|
301
|
+
Specialized methods for processing bank statements. Parse supports `pro` and `max` models only. Ask and extract support all model tiers.
|
|
302
|
+
|
|
303
|
+
```typescript
|
|
304
|
+
const result = await client.bankStatement.parse(
|
|
305
|
+
{ url: "https://example.com/statement.pdf" },
|
|
306
|
+
{ context: { documentId: "statement-2024-03" } },
|
|
307
|
+
);
|
|
308
|
+
|
|
309
|
+
console.log(result.markdown);
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
Also supports `bankStatement.ask()` and `bankStatement.extract()` with the same patterns as above.
|
|
313
|
+
|
|
314
|
+
## Academic Research
|
|
315
|
+
|
|
316
|
+
### Search Academic Publications
|
|
317
|
+
|
|
318
|
+
Search across multiple academic databases with a unified API. Costs 2 credits per request.
|
|
319
|
+
|
|
320
|
+
```typescript
|
|
321
|
+
const result = await client.academic.search({
|
|
322
|
+
query: "quantum computing",
|
|
323
|
+
providers: ["semantic-scholar", "arxiv", "pubmed"],
|
|
324
|
+
limit: 20,
|
|
325
|
+
yearFrom: 2021,
|
|
326
|
+
yearTo: 2024,
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
result.results.forEach((publication) => {
|
|
330
|
+
console.log(`Title: ${publication.title}`);
|
|
331
|
+
console.log(`Authors: ${publication.authors?.map((a) => a.name).join(", ")}`);
|
|
332
|
+
console.log(`Year: ${publication.year}`);
|
|
333
|
+
console.log("---");
|
|
334
|
+
});
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
**Supported Providers:**
|
|
338
|
+
|
|
339
|
+
- `"semantic-scholar"` (default) - [Semantic Scholar](https://www.semanticscholar.org/)
|
|
340
|
+
- `"arxiv"` - [ArXiv](https://arxiv.org/)
|
|
341
|
+
- `"pubmed"` - [PubMed](https://pubmed.ncbi.nlm.nih.gov/)
|
|
342
|
+
- `"google-scholar"` - [Google Scholar](https://scholar.google.com/)
|
|
343
|
+
- `"eric"` - [ERIC](https://eric.ed.gov/)
|
|
344
|
+
- `"europe-pmc"` - [Europe PMC](https://europepmc.org/)
|
|
345
|
+
- `"openalex"` - [OpenAlex](https://openalex.org/)
|
|
346
|
+
|
|
347
|
+
**Search Parameters:**
|
|
348
|
+
|
|
349
|
+
- `query` (required): 1-400 characters
|
|
350
|
+
- `providers`: Array of provider names (default: `["semantic-scholar"]`)
|
|
351
|
+
- `offset`: Pagination offset (default: 0)
|
|
352
|
+
- `limit`: Results per provider, 1-100 (default: 20)
|
|
353
|
+
- `yearFrom` / `yearTo`: Filter by publication year (1900-2100)
|
|
354
|
+
- `fields`: Specific fields to return (`"doi"`, `"title"`, `"url"`, `"providerURL"`, `"authors"`, `"date"`, `"year"`, `"totalCitations"`, `"totalReferences"`, `"abstract"`, `"pdfURL"`, `"provider"`, `"providerData"`)
|
|
355
|
+
|
|
356
|
+
### Fetch Academic Publications by ID
|
|
357
|
+
|
|
358
|
+
Fetch specific papers by their identifiers with automatic provider detection. Costs 2 credits per request.
|
|
359
|
+
|
|
360
|
+
```typescript
|
|
361
|
+
const result = await client.academic.fetch({
|
|
362
|
+
ids: [
|
|
363
|
+
"10.1038/nature12373", // DOI
|
|
364
|
+
"12345678", // PubMed ID
|
|
365
|
+
"2301.00001", // ArXiv ID
|
|
366
|
+
],
|
|
367
|
+
fields: ["title", "authors", "year", "abstract", "doi"],
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
result.results.forEach((pub) => {
|
|
371
|
+
console.log(`Title: ${pub.title}`);
|
|
372
|
+
console.log(`Provider: ${pub.detectedProvider}`);
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
result.errors?.forEach((error) => {
|
|
376
|
+
console.log(`Failed to fetch ${error.id}: ${error.error}`);
|
|
377
|
+
});
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
**Supported ID types:** DOI, PubMed ID, ArXiv ID, Semantic Scholar ID, ERIC ID, Europe PMC ID, OpenAlex ID.
|
|
381
|
+
|
|
382
|
+
### Find Citations for a Paragraph
|
|
383
|
+
|
|
384
|
+
Find relevant academic citations for each sentence in a paragraph using semantic similarity. Costs 2 credits per sentence analyzed.
|
|
385
|
+
|
|
386
|
+
```typescript
|
|
387
|
+
const result = await client.academic.findCitations({
|
|
388
|
+
paragraph:
|
|
389
|
+
"Transformers have revolutionized natural language processing. Attention mechanisms allow models to focus on relevant parts of the input.",
|
|
390
|
+
providers: ["semantic-scholar", "arxiv", "pubmed"],
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
console.log(
|
|
394
|
+
`Found ${result.totalCitations} citations across ${result.sentenceCount} sentences`,
|
|
395
|
+
);
|
|
396
|
+
|
|
397
|
+
for (const item of result.results) {
|
|
398
|
+
console.log(`\nSentence: ${item.sentence}`);
|
|
399
|
+
for (const citation of item.citations) {
|
|
400
|
+
console.log(` [Score: ${citation.score}/10] ${citation.title}`);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
## Document ID Tracking
|
|
406
|
+
|
|
407
|
+
Pass a `documentId` per request to track API usage. The ID is sent as a header and returned in responses for document, identity, invoice, and bank statement endpoints. Academic endpoints do not use `documentId`.
|
|
408
|
+
|
|
409
|
+
```typescript
|
|
410
|
+
const result = await client.document.parse(
|
|
411
|
+
{ url: "https://example.com/document.pdf" },
|
|
412
|
+
{ context: { documentId: "invoice-456" } },
|
|
413
|
+
);
|
|
414
|
+
|
|
415
|
+
console.log(result.documentId); // "invoice-456"
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
Each request can have its own `documentId`:
|
|
419
|
+
|
|
420
|
+
```typescript
|
|
421
|
+
const [resultA, resultB] = await Promise.all([
|
|
422
|
+
client.document.parse(
|
|
423
|
+
{ url: "https://example.com/doc-a.pdf" },
|
|
424
|
+
{ context: { documentId: "doc-a" } },
|
|
425
|
+
),
|
|
426
|
+
client.document.parse(
|
|
427
|
+
{ url: "https://example.com/doc-b.pdf" },
|
|
428
|
+
{ context: { documentId: "doc-b" } },
|
|
429
|
+
),
|
|
430
|
+
]);
|
|
431
|
+
|
|
432
|
+
console.log(resultA.documentId); // "doc-a"
|
|
433
|
+
console.log(resultB.documentId); // "doc-b"
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
## Model Tiers
|
|
437
|
+
|
|
438
|
+
| Tier | Best for | Max pages | Max size | Supported formats |
|
|
439
|
+
|------|----------|-----------|----------|-------------------|
|
|
440
|
+
| `nano` | Simple text documents | 30 | 10MB | PDF, Word, Excel, CSV |
|
|
441
|
+
| `mini` | Tables, structured content | 30 | 10MB | PDF, Word, Excel, CSV |
|
|
442
|
+
| `pro` | Complex docs, images, handwriting | 30 | 40MB | PDF, Word, Excel, CSV, Image |
|
|
443
|
+
| `max` | Large docs, full capabilities, HTML output | 1000 | 500MB | PDF, Word, Excel, CSV, Image |
|
|
444
|
+
| `auto` | Automatic selection with fallback (default) | 1000 | 500MB | PDF, Word, Excel, CSV, Image |
|
|
445
|
+
|
|
446
|
+
**Note:** Identity, invoice, and bank statement `parse` only support `pro`, `max`, and `auto` models. Their `ask` and `extract` support all model tiers.
|
|
447
|
+
|
|
448
|
+
## Credit Costs
|
|
449
|
+
|
|
450
|
+
| API | nano | mini | pro | max | Unit |
|
|
451
|
+
|-----|------|------|-----|-----|------|
|
|
452
|
+
| Document Parse | 1 | 2 | 4 | 8 | /page |
|
|
453
|
+
| Document Ask | 2 | 4 | 8 | 16 | /page |
|
|
454
|
+
| Document Extract | 2 | 4 | 8 | 16 | /page |
|
|
455
|
+
| Identity Parse | — | — | 6 | 10 | /page |
|
|
456
|
+
| Identity Ask | 6 | 10 | 14 | 18 | /page |
|
|
457
|
+
| Identity Extract | 6 | 10 | 14 | 18 | /page |
|
|
458
|
+
| Invoice Parse | — | — | 6 | 10 | /page |
|
|
459
|
+
| Invoice Ask | 6 | 10 | 14 | 18 | /page |
|
|
460
|
+
| Invoice Extract | 6 | 10 | 14 | 18 | /page |
|
|
461
|
+
| Bank Statement Parse | — | — | 6 | 10 | /page |
|
|
462
|
+
| Bank Statement Ask | 6 | 10 | 14 | 18 | /page |
|
|
463
|
+
| Bank Statement Extract | 6 | 10 | 14 | 18 | /page |
|
|
464
|
+
| Academic Search | 2 | 2 | 2 | 2 | /request |
|
|
465
|
+
| Academic Fetch | 2 | 2 | 2 | 2 | /request |
|
|
466
|
+
| Academic Find Citations | 2 | 2 | 2 | 2 | /sentence |
|
|
467
|
+
|
|
468
|
+
## Error Handling
|
|
469
|
+
|
|
470
|
+
All API errors are thrown as `PDFVectorError` instances with structured error information:
|
|
471
|
+
|
|
472
|
+
```typescript
|
|
473
|
+
import { createClient, PDFVectorError } from "@pdfvector/instance-client";
|
|
474
|
+
|
|
475
|
+
const client = createClient({
|
|
476
|
+
apiKey: "your-api-key",
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
try {
|
|
480
|
+
const result = await client.document.parse({
|
|
481
|
+
url: "https://example.com/document.pdf",
|
|
482
|
+
});
|
|
483
|
+
console.log(result.markdown);
|
|
484
|
+
} catch (error) {
|
|
485
|
+
if (error instanceof PDFVectorError) {
|
|
486
|
+
console.error(`API Error [${error.code}]: ${error.message}`);
|
|
487
|
+
console.error(`HTTP Status: ${error.status}`);
|
|
488
|
+
console.error(`Error Data:`, error.data);
|
|
489
|
+
} else {
|
|
490
|
+
console.error("Unexpected Error:", error);
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
You can also use the static type guard:
|
|
496
|
+
|
|
497
|
+
```typescript
|
|
498
|
+
try {
|
|
499
|
+
await client.document.parse({ url: "..." });
|
|
500
|
+
} catch (error) {
|
|
501
|
+
if (PDFVectorError.is(error)) {
|
|
502
|
+
switch (error.code) {
|
|
503
|
+
case "UNAUTHORIZED":
|
|
504
|
+
console.error("Invalid API key");
|
|
505
|
+
break;
|
|
506
|
+
case "BAD_REQUEST":
|
|
507
|
+
console.error("Validation error:", error.message);
|
|
508
|
+
break;
|
|
509
|
+
case "INTERNAL_SERVER_ERROR":
|
|
510
|
+
console.error("Server error:", error.message);
|
|
511
|
+
break;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
### Error Codes
|
|
518
|
+
|
|
519
|
+
| Code | Status | Description |
|
|
520
|
+
|------|--------|-------------|
|
|
521
|
+
| `BAD_REQUEST` | 400 | Input validation failed (e.g., missing fields, invalid URL, question too short) |
|
|
522
|
+
| `UNAUTHORIZED` | 401 | Missing or invalid API key |
|
|
523
|
+
| `UNPROCESSABLE_CONTENT` | 422 | Document could not be processed by the requested model tier |
|
|
524
|
+
| `TOO_MANY_REQUESTS` | 429 | Rate limit exceeded |
|
|
525
|
+
| `INTERNAL_SERVER_ERROR` | 500 | Server-side failure |
|
|
526
|
+
|
|
527
|
+
## TypeScript Support
|
|
528
|
+
|
|
529
|
+
The SDK is written in TypeScript and includes full type definitions:
|
|
530
|
+
|
|
531
|
+
```typescript
|
|
532
|
+
import {
|
|
533
|
+
createClient,
|
|
534
|
+
PDFVectorError,
|
|
535
|
+
} from "@pdfvector/instance-client";
|
|
536
|
+
|
|
537
|
+
import type {
|
|
538
|
+
Client,
|
|
539
|
+
ClientContext,
|
|
540
|
+
CreateClientOptions,
|
|
541
|
+
ContractInputs,
|
|
542
|
+
ContractOutputs,
|
|
543
|
+
PDFVectorModel,
|
|
544
|
+
} from "@pdfvector/instance-client";
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
## Runtime Support
|
|
548
|
+
|
|
549
|
+
- **Node.js**: 20+
|
|
550
|
+
- **Bun**: 1.0+
|
|
551
|
+
- **ESM only** (CommonJS is not supported)
|
|
552
|
+
- Uses standard `fetch` API
|
|
553
|
+
|
|
554
|
+
## Support
|
|
555
|
+
|
|
556
|
+
- **API Reference**: [global.pdfvector.com/api/reference](https://global.pdfvector.com/api/reference)
|
|
557
|
+
- **Dashboard**: [app.pdfvector.com](https://app.pdfvector.com)
|
|
558
|
+
- **Billing**: [app.pdfvector.com/workspace/billing](https://app.pdfvector.com/workspace/billing)
|
|
559
|
+
|
|
560
|
+
## License
|
|
561
|
+
|
|
562
|
+
MIT
|
package/package.json
CHANGED
|
@@ -1,12 +1,41 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pdfvector/instance-client",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.20",
|
|
4
4
|
"type": "module",
|
|
5
|
+
"description": "Official TypeScript/JavaScript SDK for PDFVector API - Parse PDF/Word/Image/Excel documents to clean, structured markdown format and search academic publications across multiple databases",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/phuctm97/pdfvector",
|
|
10
|
+
"directory": "packages/instance-client"
|
|
11
|
+
},
|
|
12
|
+
"homepage": "https://pdfvector.com",
|
|
13
|
+
"keywords": [
|
|
14
|
+
"pdfvector",
|
|
15
|
+
"pdf",
|
|
16
|
+
"word",
|
|
17
|
+
"markdown",
|
|
18
|
+
"api",
|
|
19
|
+
"sdk",
|
|
20
|
+
"academic",
|
|
21
|
+
"search",
|
|
22
|
+
"pubmed",
|
|
23
|
+
"semantic-scholar",
|
|
24
|
+
"google-scholar",
|
|
25
|
+
"arxiv",
|
|
26
|
+
"eric",
|
|
27
|
+
"research",
|
|
28
|
+
"papers",
|
|
29
|
+
"publications"
|
|
30
|
+
],
|
|
31
|
+
"publishConfig": {
|
|
32
|
+
"access": "public"
|
|
33
|
+
},
|
|
5
34
|
"main": ".tsc/lib/index.js",
|
|
6
35
|
"dependencies": {
|
|
7
36
|
"@orpc/client": "^1.13.13",
|
|
8
37
|
"@orpc/contract": "^1.13.13",
|
|
9
|
-
"@pdfvector/instance-contract": "^0.0.
|
|
38
|
+
"@pdfvector/instance-contract": "^0.0.22"
|
|
10
39
|
},
|
|
11
40
|
"files": [
|
|
12
41
|
".tsc",
|