pulse-ts-sdk 0.0.64 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/BaseClient.js +2 -2
- package/dist/cjs/Client.d.ts +0 -4
- package/dist/cjs/Client.js +30 -38
- package/dist/cjs/api/client/requests/ExtractAsyncRequest.d.ts +85 -29
- package/dist/cjs/api/client/requests/ExtractAsyncRequest.js +19 -0
- package/dist/cjs/api/client/requests/ExtractRequest.d.ts +85 -29
- package/dist/cjs/api/client/requests/ExtractRequest.js +19 -0
- package/dist/cjs/api/types/ExtractInput.d.ts +85 -29
- package/dist/cjs/api/types/ExtractInput.js +19 -0
- package/dist/cjs/api/types/ExtractOptions.d.ts +85 -29
- package/dist/cjs/api/types/ExtractOptions.js +19 -0
- package/dist/cjs/api/types/ExtractResponse.d.ts +89 -26
- package/dist/cjs/version.d.ts +1 -1
- package/dist/cjs/version.js +1 -1
- package/dist/esm/BaseClient.mjs +2 -2
- package/dist/esm/Client.d.mts +0 -4
- package/dist/esm/Client.mjs +30 -38
- package/dist/esm/api/client/requests/ExtractAsyncRequest.d.mts +85 -29
- package/dist/esm/api/client/requests/ExtractAsyncRequest.mjs +18 -1
- package/dist/esm/api/client/requests/ExtractRequest.d.mts +85 -29
- package/dist/esm/api/client/requests/ExtractRequest.mjs +18 -1
- package/dist/esm/api/types/ExtractInput.d.mts +85 -29
- package/dist/esm/api/types/ExtractInput.mjs +18 -1
- package/dist/esm/api/types/ExtractOptions.d.mts +85 -29
- package/dist/esm/api/types/ExtractOptions.mjs +18 -1
- package/dist/esm/api/types/ExtractResponse.d.mts +89 -26
- package/dist/esm/version.d.mts +1 -1
- package/dist/esm/version.mjs +1 -1
- package/package.json +1 -1
- package/reference.md +0 -4
package/dist/cjs/BaseClient.js
CHANGED
|
@@ -43,8 +43,8 @@ function normalizeClientOptions(options) {
|
|
|
43
43
|
const headers = (0, headers_js_1.mergeHeaders)({
|
|
44
44
|
"X-Fern-Language": "JavaScript",
|
|
45
45
|
"X-Fern-SDK-Name": "pulse-ts-sdk",
|
|
46
|
-
"X-Fern-SDK-Version": "
|
|
47
|
-
"User-Agent": "pulse-ts-sdk/
|
|
46
|
+
"X-Fern-SDK-Version": "1.0.1",
|
|
47
|
+
"User-Agent": "pulse-ts-sdk/1.0.1",
|
|
48
48
|
"X-Fern-Runtime": core.RUNTIME.type,
|
|
49
49
|
"X-Fern-Runtime-Version": core.RUNTIME.version,
|
|
50
50
|
}, options === null || options === void 0 ? void 0 : options.headers);
|
package/dist/cjs/Client.d.ts
CHANGED
|
@@ -24,10 +24,6 @@ export declare class PulseClient {
|
|
|
24
24
|
* Set `async: true` to return immediately with a job_id for polling via
|
|
25
25
|
* GET /job/{jobId}. Otherwise processes synchronously.
|
|
26
26
|
*
|
|
27
|
-
* **Note:** Both sync and async modes return HTTP 200. When `async` is true
|
|
28
|
-
* the response body contains `{ job_id, status }` instead of the full
|
|
29
|
-
* extraction result.
|
|
30
|
-
*
|
|
31
27
|
* @param {Pulse.ExtractRequest} request
|
|
32
28
|
* @param {PulseClient.RequestOptions} requestOptions - Request-specific configuration.
|
|
33
29
|
*
|
package/dist/cjs/Client.js
CHANGED
|
@@ -74,10 +74,6 @@ class PulseClient {
|
|
|
74
74
|
* Set `async: true` to return immediately with a job_id for polling via
|
|
75
75
|
* GET /job/{jobId}. Otherwise processes synchronously.
|
|
76
76
|
*
|
|
77
|
-
* **Note:** Both sync and async modes return HTTP 200. When `async` is true
|
|
78
|
-
* the response body contains `{ job_id, status }` instead of the full
|
|
79
|
-
* extraction result.
|
|
80
|
-
*
|
|
81
77
|
* @param {Pulse.ExtractRequest} request
|
|
82
78
|
* @param {PulseClient.RequestOptions} requestOptions - Request-specific configuration.
|
|
83
79
|
*
|
|
@@ -102,17 +98,27 @@ class PulseClient {
|
|
|
102
98
|
if (request.fileUrl != null) {
|
|
103
99
|
_request.append("fileUrl", request.fileUrl);
|
|
104
100
|
}
|
|
101
|
+
if (request.pages != null) {
|
|
102
|
+
_request.append("pages", request.pages);
|
|
103
|
+
}
|
|
104
|
+
if (request.figureProcessing != null) {
|
|
105
|
+
_request.append("figureProcessing", (0, json_js_1.toJson)(request.figureProcessing));
|
|
106
|
+
}
|
|
107
|
+
if (request.extensions != null) {
|
|
108
|
+
_request.append("extensions", (0, json_js_1.toJson)(request.extensions));
|
|
109
|
+
}
|
|
110
|
+
if (request.storage != null) {
|
|
111
|
+
_request.append("storage", (0, json_js_1.toJson)(request.storage));
|
|
112
|
+
}
|
|
113
|
+
if (request.async != null) {
|
|
114
|
+
_request.append("async", request.async.toString());
|
|
115
|
+
}
|
|
105
116
|
if (request.structuredOutput != null) {
|
|
106
117
|
_request.append("structuredOutput", (0, json_js_1.toJson)(request.structuredOutput));
|
|
107
118
|
}
|
|
108
119
|
if (request.schema != null) {
|
|
109
120
|
_request.append("schema", typeof request.schema === "string" ? request.schema : (0, json_js_1.toJson)(request.schema));
|
|
110
121
|
}
|
|
111
|
-
if (request.experimentalSchema != null) {
|
|
112
|
-
_request.append("experimentalSchema", typeof request.experimentalSchema === "string"
|
|
113
|
-
? request.experimentalSchema
|
|
114
|
-
: (0, json_js_1.toJson)(request.experimentalSchema));
|
|
115
|
-
}
|
|
116
122
|
if (request.schemaPrompt != null) {
|
|
117
123
|
_request.append("schemaPrompt", request.schemaPrompt);
|
|
118
124
|
}
|
|
@@ -125,9 +131,6 @@ class PulseClient {
|
|
|
125
131
|
if (request.chunkSize != null) {
|
|
126
132
|
_request.append("chunkSize", request.chunkSize.toString());
|
|
127
133
|
}
|
|
128
|
-
if (request.pages != null) {
|
|
129
|
-
_request.append("pages", request.pages);
|
|
130
|
-
}
|
|
131
134
|
if (request.extractFigure != null) {
|
|
132
135
|
_request.append("extractFigure", request.extractFigure.toString());
|
|
133
136
|
}
|
|
@@ -140,18 +143,9 @@ class PulseClient {
|
|
|
140
143
|
if (request.returnHtml != null) {
|
|
141
144
|
_request.append("returnHtml", request.returnHtml.toString());
|
|
142
145
|
}
|
|
143
|
-
if (request.effort != null) {
|
|
144
|
-
_request.append("effort", request.effort.toString());
|
|
145
|
-
}
|
|
146
146
|
if (request.thinking != null) {
|
|
147
147
|
_request.append("thinking", request.thinking.toString());
|
|
148
148
|
}
|
|
149
|
-
if (request.storage != null) {
|
|
150
|
-
_request.append("storage", (0, json_js_1.toJson)(request.storage));
|
|
151
|
-
}
|
|
152
|
-
if (request.async != null) {
|
|
153
|
-
_request.append("async", request.async.toString());
|
|
154
|
-
}
|
|
155
149
|
const _maybeEncodedRequest = yield _request.getRequest();
|
|
156
150
|
const _authRequest = yield this._options.authProvider.getAuthRequest();
|
|
157
151
|
const _headers = (0, headers_js_1.mergeHeaders)(_authRequest.headers, (_a = this._options) === null || _a === void 0 ? void 0 : _a.headers, (0, headers_js_1.mergeOnlyDefinedHeaders)(Object.assign({}, _maybeEncodedRequest.headers)), requestOptions === null || requestOptions === void 0 ? void 0 : requestOptions.headers);
|
|
@@ -222,17 +216,27 @@ class PulseClient {
|
|
|
222
216
|
if (request.fileUrl != null) {
|
|
223
217
|
_request.append("fileUrl", request.fileUrl);
|
|
224
218
|
}
|
|
219
|
+
if (request.pages != null) {
|
|
220
|
+
_request.append("pages", request.pages);
|
|
221
|
+
}
|
|
222
|
+
if (request.figureProcessing != null) {
|
|
223
|
+
_request.append("figureProcessing", (0, json_js_1.toJson)(request.figureProcessing));
|
|
224
|
+
}
|
|
225
|
+
if (request.extensions != null) {
|
|
226
|
+
_request.append("extensions", (0, json_js_1.toJson)(request.extensions));
|
|
227
|
+
}
|
|
228
|
+
if (request.storage != null) {
|
|
229
|
+
_request.append("storage", (0, json_js_1.toJson)(request.storage));
|
|
230
|
+
}
|
|
231
|
+
if (request.async != null) {
|
|
232
|
+
_request.append("async", request.async.toString());
|
|
233
|
+
}
|
|
225
234
|
if (request.structuredOutput != null) {
|
|
226
235
|
_request.append("structuredOutput", (0, json_js_1.toJson)(request.structuredOutput));
|
|
227
236
|
}
|
|
228
237
|
if (request.schema != null) {
|
|
229
238
|
_request.append("schema", typeof request.schema === "string" ? request.schema : (0, json_js_1.toJson)(request.schema));
|
|
230
239
|
}
|
|
231
|
-
if (request.experimentalSchema != null) {
|
|
232
|
-
_request.append("experimentalSchema", typeof request.experimentalSchema === "string"
|
|
233
|
-
? request.experimentalSchema
|
|
234
|
-
: (0, json_js_1.toJson)(request.experimentalSchema));
|
|
235
|
-
}
|
|
236
240
|
if (request.schemaPrompt != null) {
|
|
237
241
|
_request.append("schemaPrompt", request.schemaPrompt);
|
|
238
242
|
}
|
|
@@ -245,9 +249,6 @@ class PulseClient {
|
|
|
245
249
|
if (request.chunkSize != null) {
|
|
246
250
|
_request.append("chunkSize", request.chunkSize.toString());
|
|
247
251
|
}
|
|
248
|
-
if (request.pages != null) {
|
|
249
|
-
_request.append("pages", request.pages);
|
|
250
|
-
}
|
|
251
252
|
if (request.extractFigure != null) {
|
|
252
253
|
_request.append("extractFigure", request.extractFigure.toString());
|
|
253
254
|
}
|
|
@@ -260,18 +261,9 @@ class PulseClient {
|
|
|
260
261
|
if (request.returnHtml != null) {
|
|
261
262
|
_request.append("returnHtml", request.returnHtml.toString());
|
|
262
263
|
}
|
|
263
|
-
if (request.effort != null) {
|
|
264
|
-
_request.append("effort", request.effort.toString());
|
|
265
|
-
}
|
|
266
264
|
if (request.thinking != null) {
|
|
267
265
|
_request.append("thinking", request.thinking.toString());
|
|
268
266
|
}
|
|
269
|
-
if (request.storage != null) {
|
|
270
|
-
_request.append("storage", (0, json_js_1.toJson)(request.storage));
|
|
271
|
-
}
|
|
272
|
-
if (request.async != null) {
|
|
273
|
-
_request.append("async", request.async.toString());
|
|
274
|
-
}
|
|
275
267
|
const _maybeEncodedRequest = yield _request.getRequest();
|
|
276
268
|
const _authRequest = yield this._options.authProvider.getAuthRequest();
|
|
277
269
|
const _headers = (0, headers_js_1.mergeHeaders)(_authRequest.headers, (_a = this._options) === null || _a === void 0 ? void 0 : _a.headers, (0, headers_js_1.mergeOnlyDefinedHeaders)(Object.assign({}, _maybeEncodedRequest.headers)), requestOptions === null || requestOptions === void 0 ? void 0 : requestOptions.headers);
|
|
@@ -8,57 +8,96 @@ export interface ExtractAsyncRequest {
|
|
|
8
8
|
file?: core.file.Uploadable | undefined;
|
|
9
9
|
/** Public or pre-signed URL that Pulse will download and extract. Required unless file is provided. */
|
|
10
10
|
fileUrl?: string;
|
|
11
|
+
/** Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`. */
|
|
12
|
+
pages?: string;
|
|
13
|
+
/** Settings that control how figures in the document are processed. These affect the markdown output directly (e.g. figure descriptions, chart-to-table conversion, image embedding) and do not produce additional output fields in the response. */
|
|
14
|
+
figureProcessing?: ExtractAsyncRequest.FigureProcessing;
|
|
15
|
+
/** Settings that enable additional processing passes or alternate output formats. Each enabled extension produces a corresponding output field under `response.extensions.*`. */
|
|
16
|
+
extensions?: ExtractAsyncRequest.Extensions;
|
|
17
|
+
/** Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created. */
|
|
18
|
+
storage?: ExtractAsyncRequest.Storage;
|
|
19
|
+
/** If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously. */
|
|
20
|
+
async?: boolean;
|
|
11
21
|
/** **⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version. */
|
|
12
22
|
structuredOutput?: ExtractAsyncRequest.StructuredOutput;
|
|
13
23
|
/** (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation. */
|
|
14
24
|
schema?: ExtractAsyncRequest.Schema;
|
|
15
|
-
/** (Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation. */
|
|
16
|
-
experimentalSchema?: ExtractAsyncRequest.ExperimentalSchema;
|
|
17
25
|
/** (Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead. */
|
|
18
26
|
schemaPrompt?: string;
|
|
19
|
-
/** (Deprecated) Custom instructions that augment the default extraction behaviour. */
|
|
27
|
+
/** (Deprecated) Custom instructions that augment the default extraction behaviour. Use `figureProcessing` or `extensions` instead. */
|
|
20
28
|
customPrompt?: string;
|
|
21
|
-
/** Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`). */
|
|
29
|
+
/** **⚠️ DEPRECATED** — Use `extensions.chunking.chunkTypes` instead. Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`). Still accepted for backward compatibility. */
|
|
22
30
|
chunking?: string;
|
|
23
|
-
/** Override for maximum characters per chunk when chunking is enabled. */
|
|
31
|
+
/** **⚠️ DEPRECATED** — Use `extensions.chunking.chunkSize` instead. Override for maximum characters per chunk when chunking is enabled. */
|
|
24
32
|
chunkSize?: number;
|
|
25
|
-
/**
|
|
26
|
-
pages?: string;
|
|
27
|
-
/** Toggle to enable figure extraction in results. */
|
|
33
|
+
/** **⚠️ DEPRECATED** — Toggle to enable figure extraction in results. */
|
|
28
34
|
extractFigure?: boolean;
|
|
29
|
-
/** Toggle to generate descriptive captions for extracted figures. */
|
|
35
|
+
/** **⚠️ DEPRECATED** — Use `figureProcessing.description` instead. Toggle to generate descriptive captions for extracted figures. */
|
|
30
36
|
figureDescription?: boolean;
|
|
31
|
-
/** Embed base64-encoded images inline in figure tags in the output. Increases response size. */
|
|
37
|
+
/** **⚠️ DEPRECATED** — Use `figureProcessing.showImages` instead. Embed base64-encoded images inline in figure tags in the output. Increases response size. */
|
|
32
38
|
showImages?: boolean;
|
|
33
|
-
/** Whether to include HTML representation alongside markdown in the response. */
|
|
39
|
+
/** **⚠️ DEPRECATED** — Use `extensions.altOutputs.returnHtml` instead. Whether to include HTML representation alongside markdown in the response. */
|
|
34
40
|
returnHtml?: boolean;
|
|
35
|
-
/** Enable extended reasoning mode for higher quality extraction on complex documents. Uses a more powerful model at higher latency. */
|
|
36
|
-
effort?: boolean;
|
|
37
41
|
/** (Deprecated) Enables expanded rationale output for debugging. */
|
|
38
42
|
thinking?: boolean;
|
|
39
|
-
/** Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created. */
|
|
40
|
-
storage?: ExtractAsyncRequest.Storage;
|
|
41
|
-
/** If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously. */
|
|
42
|
-
async?: boolean;
|
|
43
43
|
}
|
|
44
44
|
export declare namespace ExtractAsyncRequest {
|
|
45
45
|
/**
|
|
46
|
-
*
|
|
46
|
+
* Settings that control how figures in the document are processed. These affect the markdown output directly (e.g. figure descriptions, chart-to-table conversion, image embedding) and do not produce additional output fields in the response.
|
|
47
47
|
*/
|
|
48
|
-
interface
|
|
49
|
-
/**
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
|
|
48
|
+
interface FigureProcessing {
|
|
49
|
+
/** Generate descriptive captions for extracted figures. */
|
|
50
|
+
description?: boolean;
|
|
51
|
+
/** Embed base64-encoded images inline in figure tags in the output. Increases response size. */
|
|
52
|
+
showImages?: boolean;
|
|
53
53
|
}
|
|
54
54
|
/**
|
|
55
|
-
*
|
|
55
|
+
* Settings that enable additional processing passes or alternate output formats. Each enabled extension produces a corresponding output field under `response.extensions.*`.
|
|
56
56
|
*/
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
57
|
+
interface Extensions {
|
|
58
|
+
/** Merge tables that span multiple pages into a single table. */
|
|
59
|
+
mergeTables?: boolean;
|
|
60
|
+
/** Link footnote markers to their corresponding footnote text. */
|
|
61
|
+
footnoteReferences?: boolean;
|
|
62
|
+
/** Chunking configuration. When provided, the document is split into chunks using the specified strategies. Results appear in `response.extensions.chunking`. */
|
|
63
|
+
chunking?: Extensions.Chunking;
|
|
64
|
+
/** Alternate output format options. Each enabled format produces a corresponding field under `response.extensions.altOutputs`. */
|
|
65
|
+
altOutputs?: Extensions.AltOutputs;
|
|
66
|
+
}
|
|
67
|
+
namespace Extensions {
|
|
68
|
+
/**
|
|
69
|
+
* Chunking configuration. When provided, the document is split into chunks using the specified strategies. Results appear in `response.extensions.chunking`.
|
|
70
|
+
*/
|
|
71
|
+
interface Chunking {
|
|
72
|
+
/** List of chunking strategies to apply (e.g. `["semantic", "header", "page", "recursive"]`). */
|
|
73
|
+
chunkTypes?: Chunking.ChunkTypes.Item[];
|
|
74
|
+
/** Maximum characters per chunk. */
|
|
75
|
+
chunkSize?: number;
|
|
76
|
+
}
|
|
77
|
+
namespace Chunking {
|
|
78
|
+
type ChunkTypes = ChunkTypes.Item[];
|
|
79
|
+
namespace ChunkTypes {
|
|
80
|
+
const Item: {
|
|
81
|
+
readonly Semantic: "semantic";
|
|
82
|
+
readonly Header: "header";
|
|
83
|
+
readonly Page: "page";
|
|
84
|
+
readonly Recursive: "recursive";
|
|
85
|
+
};
|
|
86
|
+
type Item = (typeof Item)[keyof typeof Item];
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Alternate output format options. Each enabled format produces a corresponding field under `response.extensions.altOutputs`.
|
|
91
|
+
*/
|
|
92
|
+
interface AltOutputs {
|
|
93
|
+
/** Enable word-level bounding boxes. Runs an additional OCR model to derive bounding boxes for each word. Only applies to PDFs. Results in `response.extensions.altOutputs.wlbb`. */
|
|
94
|
+
wlbb?: boolean;
|
|
95
|
+
/** Include an HTML representation of the document. When enabled, `response.markdown` is still present and the HTML is available at `response.extensions.altOutputs.html`. */
|
|
96
|
+
returnHtml?: boolean;
|
|
97
|
+
/** Include an XML representation of the document. Results in `response.extensions.altOutputs.xml`. (Work in progress.) */
|
|
98
|
+
returnXml?: boolean;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
62
101
|
/**
|
|
63
102
|
* Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
64
103
|
*/
|
|
@@ -67,5 +106,22 @@ export declare namespace ExtractAsyncRequest {
|
|
|
67
106
|
enabled?: boolean;
|
|
68
107
|
/** Target folder name to save the extraction to. Creates the folder if it doesn't exist. */
|
|
69
108
|
folderName?: string;
|
|
109
|
+
/** Target folder ID to save the extraction to. Takes precedence over folderName if both are provided. */
|
|
110
|
+
folderId?: string;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* **⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version.
|
|
114
|
+
*/
|
|
115
|
+
interface StructuredOutput {
|
|
116
|
+
/** JSON schema describing the structured data to extract. */
|
|
117
|
+
schema?: Record<string, unknown>;
|
|
118
|
+
/** Natural language prompt with additional extraction instructions. */
|
|
119
|
+
schemaPrompt?: string;
|
|
120
|
+
/** Use higher quality model for better results. When true, uses a more capable model at the cost of higher latency. */
|
|
121
|
+
effort?: boolean;
|
|
70
122
|
}
|
|
123
|
+
/**
|
|
124
|
+
* (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
125
|
+
*/
|
|
126
|
+
type Schema = Record<string, unknown> | string;
|
|
71
127
|
}
|
|
@@ -1,3 +1,22 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
// This file was auto-generated by Fern from our API Definition.
|
|
3
3
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.ExtractAsyncRequest = void 0;
|
|
5
|
+
var ExtractAsyncRequest;
|
|
6
|
+
(function (ExtractAsyncRequest) {
|
|
7
|
+
let Extensions;
|
|
8
|
+
(function (Extensions) {
|
|
9
|
+
let Chunking;
|
|
10
|
+
(function (Chunking) {
|
|
11
|
+
let ChunkTypes;
|
|
12
|
+
(function (ChunkTypes) {
|
|
13
|
+
ChunkTypes.Item = {
|
|
14
|
+
Semantic: "semantic",
|
|
15
|
+
Header: "header",
|
|
16
|
+
Page: "page",
|
|
17
|
+
Recursive: "recursive",
|
|
18
|
+
};
|
|
19
|
+
})(ChunkTypes = Chunking.ChunkTypes || (Chunking.ChunkTypes = {}));
|
|
20
|
+
})(Chunking = Extensions.Chunking || (Extensions.Chunking = {}));
|
|
21
|
+
})(Extensions = ExtractAsyncRequest.Extensions || (ExtractAsyncRequest.Extensions = {}));
|
|
22
|
+
})(ExtractAsyncRequest || (exports.ExtractAsyncRequest = ExtractAsyncRequest = {}));
|
|
@@ -8,57 +8,96 @@ export interface ExtractRequest {
|
|
|
8
8
|
file?: core.file.Uploadable | undefined;
|
|
9
9
|
/** Public or pre-signed URL that Pulse will download and extract. Required unless file is provided. */
|
|
10
10
|
fileUrl?: string;
|
|
11
|
+
/** Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`. */
|
|
12
|
+
pages?: string;
|
|
13
|
+
/** Settings that control how figures in the document are processed. These affect the markdown output directly (e.g. figure descriptions, chart-to-table conversion, image embedding) and do not produce additional output fields in the response. */
|
|
14
|
+
figureProcessing?: ExtractRequest.FigureProcessing;
|
|
15
|
+
/** Settings that enable additional processing passes or alternate output formats. Each enabled extension produces a corresponding output field under `response.extensions.*`. */
|
|
16
|
+
extensions?: ExtractRequest.Extensions;
|
|
17
|
+
/** Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created. */
|
|
18
|
+
storage?: ExtractRequest.Storage;
|
|
19
|
+
/** If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously. */
|
|
20
|
+
async?: boolean;
|
|
11
21
|
/** **⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version. */
|
|
12
22
|
structuredOutput?: ExtractRequest.StructuredOutput;
|
|
13
23
|
/** (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation. */
|
|
14
24
|
schema?: ExtractRequest.Schema;
|
|
15
|
-
/** (Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation. */
|
|
16
|
-
experimentalSchema?: ExtractRequest.ExperimentalSchema;
|
|
17
25
|
/** (Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead. */
|
|
18
26
|
schemaPrompt?: string;
|
|
19
|
-
/** (Deprecated) Custom instructions that augment the default extraction behaviour. */
|
|
27
|
+
/** (Deprecated) Custom instructions that augment the default extraction behaviour. Use `figureProcessing` or `extensions` instead. */
|
|
20
28
|
customPrompt?: string;
|
|
21
|
-
/** Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`). */
|
|
29
|
+
/** **⚠️ DEPRECATED** — Use `extensions.chunking.chunkTypes` instead. Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`). Still accepted for backward compatibility. */
|
|
22
30
|
chunking?: string;
|
|
23
|
-
/** Override for maximum characters per chunk when chunking is enabled. */
|
|
31
|
+
/** **⚠️ DEPRECATED** — Use `extensions.chunking.chunkSize` instead. Override for maximum characters per chunk when chunking is enabled. */
|
|
24
32
|
chunkSize?: number;
|
|
25
|
-
/**
|
|
26
|
-
pages?: string;
|
|
27
|
-
/** Toggle to enable figure extraction in results. */
|
|
33
|
+
/** **⚠️ DEPRECATED** — Toggle to enable figure extraction in results. */
|
|
28
34
|
extractFigure?: boolean;
|
|
29
|
-
/** Toggle to generate descriptive captions for extracted figures. */
|
|
35
|
+
/** **⚠️ DEPRECATED** — Use `figureProcessing.description` instead. Toggle to generate descriptive captions for extracted figures. */
|
|
30
36
|
figureDescription?: boolean;
|
|
31
|
-
/** Embed base64-encoded images inline in figure tags in the output. Increases response size. */
|
|
37
|
+
/** **⚠️ DEPRECATED** — Use `figureProcessing.showImages` instead. Embed base64-encoded images inline in figure tags in the output. Increases response size. */
|
|
32
38
|
showImages?: boolean;
|
|
33
|
-
/** Whether to include HTML representation alongside markdown in the response. */
|
|
39
|
+
/** **⚠️ DEPRECATED** — Use `extensions.altOutputs.returnHtml` instead. Whether to include HTML representation alongside markdown in the response. */
|
|
34
40
|
returnHtml?: boolean;
|
|
35
|
-
/** Enable extended reasoning mode for higher quality extraction on complex documents. Uses a more powerful model at higher latency. */
|
|
36
|
-
effort?: boolean;
|
|
37
41
|
/** (Deprecated) Enables expanded rationale output for debugging. */
|
|
38
42
|
thinking?: boolean;
|
|
39
|
-
/** Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created. */
|
|
40
|
-
storage?: ExtractRequest.Storage;
|
|
41
|
-
/** If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously. */
|
|
42
|
-
async?: boolean;
|
|
43
43
|
}
|
|
44
44
|
export declare namespace ExtractRequest {
|
|
45
45
|
/**
|
|
46
|
-
*
|
|
46
|
+
* Settings that control how figures in the document are processed. These affect the markdown output directly (e.g. figure descriptions, chart-to-table conversion, image embedding) and do not produce additional output fields in the response.
|
|
47
47
|
*/
|
|
48
|
-
interface
|
|
49
|
-
/**
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
|
|
48
|
+
interface FigureProcessing {
|
|
49
|
+
/** Generate descriptive captions for extracted figures. */
|
|
50
|
+
description?: boolean;
|
|
51
|
+
/** Embed base64-encoded images inline in figure tags in the output. Increases response size. */
|
|
52
|
+
showImages?: boolean;
|
|
53
53
|
}
|
|
54
54
|
/**
|
|
55
|
-
*
|
|
55
|
+
* Settings that enable additional processing passes or alternate output formats. Each enabled extension produces a corresponding output field under `response.extensions.*`.
|
|
56
56
|
*/
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
57
|
+
interface Extensions {
|
|
58
|
+
/** Merge tables that span multiple pages into a single table. */
|
|
59
|
+
mergeTables?: boolean;
|
|
60
|
+
/** Link footnote markers to their corresponding footnote text. */
|
|
61
|
+
footnoteReferences?: boolean;
|
|
62
|
+
/** Chunking configuration. When provided, the document is split into chunks using the specified strategies. Results appear in `response.extensions.chunking`. */
|
|
63
|
+
chunking?: Extensions.Chunking;
|
|
64
|
+
/** Alternate output format options. Each enabled format produces a corresponding field under `response.extensions.altOutputs`. */
|
|
65
|
+
altOutputs?: Extensions.AltOutputs;
|
|
66
|
+
}
|
|
67
|
+
namespace Extensions {
|
|
68
|
+
/**
|
|
69
|
+
* Chunking configuration. When provided, the document is split into chunks using the specified strategies. Results appear in `response.extensions.chunking`.
|
|
70
|
+
*/
|
|
71
|
+
interface Chunking {
|
|
72
|
+
/** List of chunking strategies to apply (e.g. `["semantic", "header", "page", "recursive"]`). */
|
|
73
|
+
chunkTypes?: Chunking.ChunkTypes.Item[];
|
|
74
|
+
/** Maximum characters per chunk. */
|
|
75
|
+
chunkSize?: number;
|
|
76
|
+
}
|
|
77
|
+
namespace Chunking {
|
|
78
|
+
type ChunkTypes = ChunkTypes.Item[];
|
|
79
|
+
namespace ChunkTypes {
|
|
80
|
+
const Item: {
|
|
81
|
+
readonly Semantic: "semantic";
|
|
82
|
+
readonly Header: "header";
|
|
83
|
+
readonly Page: "page";
|
|
84
|
+
readonly Recursive: "recursive";
|
|
85
|
+
};
|
|
86
|
+
type Item = (typeof Item)[keyof typeof Item];
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Alternate output format options. Each enabled format produces a corresponding field under `response.extensions.altOutputs`.
|
|
91
|
+
*/
|
|
92
|
+
interface AltOutputs {
|
|
93
|
+
/** Enable word-level bounding boxes. Runs an additional OCR model to derive bounding boxes for each word. Only applies to PDFs. Results in `response.extensions.altOutputs.wlbb`. */
|
|
94
|
+
wlbb?: boolean;
|
|
95
|
+
/** Include an HTML representation of the document. When enabled, `response.markdown` is still present and the HTML is available at `response.extensions.altOutputs.html`. */
|
|
96
|
+
returnHtml?: boolean;
|
|
97
|
+
/** Include an XML representation of the document. Results in `response.extensions.altOutputs.xml`. (Work in progress.) */
|
|
98
|
+
returnXml?: boolean;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
62
101
|
/**
|
|
63
102
|
* Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
64
103
|
*/
|
|
@@ -67,5 +106,22 @@ export declare namespace ExtractRequest {
|
|
|
67
106
|
enabled?: boolean;
|
|
68
107
|
/** Target folder name to save the extraction to. Creates the folder if it doesn't exist. */
|
|
69
108
|
folderName?: string;
|
|
109
|
+
/** Target folder ID to save the extraction to. Takes precedence over folderName if both are provided. */
|
|
110
|
+
folderId?: string;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* **⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version.
|
|
114
|
+
*/
|
|
115
|
+
interface StructuredOutput {
|
|
116
|
+
/** JSON schema describing the structured data to extract. */
|
|
117
|
+
schema?: Record<string, unknown>;
|
|
118
|
+
/** Natural language prompt with additional extraction instructions. */
|
|
119
|
+
schemaPrompt?: string;
|
|
120
|
+
/** Use higher quality model for better results. When true, uses a more capable model at the cost of higher latency. */
|
|
121
|
+
effort?: boolean;
|
|
70
122
|
}
|
|
123
|
+
/**
|
|
124
|
+
* (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
125
|
+
*/
|
|
126
|
+
type Schema = Record<string, unknown> | string;
|
|
71
127
|
}
|
|
@@ -1,3 +1,22 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
// This file was auto-generated by Fern from our API Definition.
|
|
3
3
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.ExtractRequest = void 0;
|
|
5
|
+
var ExtractRequest;
|
|
6
|
+
(function (ExtractRequest) {
|
|
7
|
+
let Extensions;
|
|
8
|
+
(function (Extensions) {
|
|
9
|
+
let Chunking;
|
|
10
|
+
(function (Chunking) {
|
|
11
|
+
let ChunkTypes;
|
|
12
|
+
(function (ChunkTypes) {
|
|
13
|
+
ChunkTypes.Item = {
|
|
14
|
+
Semantic: "semantic",
|
|
15
|
+
Header: "header",
|
|
16
|
+
Page: "page",
|
|
17
|
+
Recursive: "recursive",
|
|
18
|
+
};
|
|
19
|
+
})(ChunkTypes = Chunking.ChunkTypes || (Chunking.ChunkTypes = {}));
|
|
20
|
+
})(Chunking = Extensions.Chunking || (Extensions.Chunking = {}));
|
|
21
|
+
})(Extensions = ExtractRequest.Extensions || (ExtractRequest.Extensions = {}));
|
|
22
|
+
})(ExtractRequest || (exports.ExtractRequest = ExtractRequest = {}));
|