graphor 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +67 -0
- package/README.md +320 -31
- package/client.d.mts +3 -6
- package/client.d.mts.map +1 -1
- package/client.d.ts +3 -6
- package/client.d.ts.map +1 -1
- package/client.js +15 -20
- package/client.js.map +1 -1
- package/client.mjs +15 -20
- package/client.mjs.map +1 -1
- package/internal/qs/formats.d.mts +7 -0
- package/internal/qs/formats.d.mts.map +1 -0
- package/internal/qs/formats.d.ts +7 -0
- package/internal/qs/formats.d.ts.map +1 -0
- package/internal/qs/formats.js +13 -0
- package/internal/qs/formats.js.map +1 -0
- package/internal/qs/formats.mjs +9 -0
- package/internal/qs/formats.mjs.map +1 -0
- package/internal/qs/index.d.mts +10 -0
- package/internal/qs/index.d.mts.map +1 -0
- package/internal/qs/index.d.ts +10 -0
- package/internal/qs/index.d.ts.map +1 -0
- package/internal/qs/index.js +14 -0
- package/internal/qs/index.js.map +1 -0
- package/internal/qs/index.mjs +10 -0
- package/internal/qs/index.mjs.map +1 -0
- package/internal/qs/stringify.d.mts +3 -0
- package/internal/qs/stringify.d.mts.map +1 -0
- package/internal/qs/stringify.d.ts +3 -0
- package/internal/qs/stringify.d.ts.map +1 -0
- package/internal/qs/stringify.js +277 -0
- package/internal/qs/stringify.js.map +1 -0
- package/internal/qs/stringify.mjs +274 -0
- package/internal/qs/stringify.mjs.map +1 -0
- package/internal/qs/types.d.mts +57 -0
- package/internal/qs/types.d.mts.map +1 -0
- package/internal/qs/types.d.ts +57 -0
- package/internal/qs/types.d.ts.map +1 -0
- package/internal/qs/types.js +3 -0
- package/internal/qs/types.js.map +1 -0
- package/internal/qs/types.mjs +2 -0
- package/internal/qs/types.mjs.map +1 -0
- package/internal/qs/utils.d.mts +15 -0
- package/internal/qs/utils.d.mts.map +1 -0
- package/internal/qs/utils.d.ts +15 -0
- package/internal/qs/utils.d.ts.map +1 -0
- package/internal/qs/utils.js +230 -0
- package/internal/qs/utils.js.map +1 -0
- package/internal/qs/utils.mjs +217 -0
- package/internal/qs/utils.mjs.map +1 -0
- package/internal/tslib.js +17 -17
- package/internal/utils/query.d.mts +2 -0
- package/internal/utils/query.d.mts.map +1 -0
- package/internal/utils/query.d.ts +2 -0
- package/internal/utils/query.d.ts.map +1 -0
- package/internal/utils/query.js +10 -0
- package/internal/utils/query.js.map +1 -0
- package/internal/utils/query.mjs +6 -0
- package/internal/utils/query.mjs.map +1 -0
- package/internal/utils.d.mts +1 -0
- package/internal/utils.d.ts +1 -0
- package/internal/utils.js +1 -0
- package/internal/utils.js.map +1 -1
- package/internal/utils.mjs +1 -0
- package/package.json +1 -1
- package/resources/index.d.mts +1 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +1 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/sources.d.mts +496 -332
- package/resources/sources.d.mts.map +1 -1
- package/resources/sources.d.ts +496 -332
- package/resources/sources.d.ts.map +1 -1
- package/resources/sources.js +200 -236
- package/resources/sources.js.map +1 -1
- package/resources/sources.mjs +200 -236
- package/resources/sources.mjs.map +1 -1
- package/src/client.ts +52 -40
- package/src/internal/qs/LICENSE.md +13 -0
- package/src/internal/qs/README.md +3 -0
- package/src/internal/qs/formats.ts +10 -0
- package/src/internal/qs/index.ts +13 -0
- package/src/internal/qs/stringify.ts +385 -0
- package/src/internal/qs/types.ts +71 -0
- package/src/internal/qs/utils.ts +265 -0
- package/src/internal/utils/query.ts +7 -0
- package/src/internal/utils.ts +1 -0
- package/src/resources/index.ts +17 -8
- package/src/resources/sources.ts +617 -367
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
package/src/resources/sources.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { APIPromise } from '../core/api-promise';
|
|
|
5
5
|
import { type Uploadable } from '../core/uploads';
|
|
6
6
|
import { RequestOptions } from '../internal/request-options';
|
|
7
7
|
import { multipartFormRequestOptions } from '../internal/uploads';
|
|
8
|
+
import { path } from '../internal/utils/path';
|
|
8
9
|
|
|
9
10
|
export class Sources extends APIResource {
|
|
10
11
|
/**
|
|
@@ -15,12 +16,17 @@ export class Sources extends APIResource {
|
|
|
15
16
|
* type, origin) along with its current processing status and a human-readable
|
|
16
17
|
* status message.
|
|
17
18
|
*
|
|
19
|
+
* **Query parameters:**
|
|
20
|
+
*
|
|
21
|
+
* - **file_ids** (list, optional): If provided, only sources whose file_id is in
|
|
22
|
+
* this list are returned. Repeat the param for multiple IDs (e.g.
|
|
23
|
+
* ?file_ids=id1&file_ids=id2).
|
|
24
|
+
*
|
|
18
25
|
* **Status messages returned per source:**
|
|
19
26
|
*
|
|
20
27
|
* - `"completed"` → _"Source processed successfully"_
|
|
21
28
|
* - `"processing"` → _"Source is being processed"_
|
|
22
29
|
* - `"failed"` → _"Source processing failed"_
|
|
23
|
-
* - `"new"` → _"Source uploaded, awaiting processing"_
|
|
24
30
|
*
|
|
25
31
|
* **Returns** a JSON array of `PublicSourceResponse` objects.
|
|
26
32
|
*
|
|
@@ -33,8 +39,11 @@ export class Sources extends APIResource {
|
|
|
33
39
|
* const publicSources = await client.sources.list();
|
|
34
40
|
* ```
|
|
35
41
|
*/
|
|
36
|
-
list(
|
|
37
|
-
|
|
42
|
+
list(
|
|
43
|
+
query: SourceListParams | null | undefined = {},
|
|
44
|
+
options?: RequestOptions,
|
|
45
|
+
): APIPromise<SourceListResponse> {
|
|
46
|
+
return this._client.get('/sources', { query, ...options });
|
|
38
47
|
}
|
|
39
48
|
|
|
40
49
|
/**
|
|
@@ -170,362 +179,434 @@ export class Sources extends APIResource {
|
|
|
170
179
|
}
|
|
171
180
|
|
|
172
181
|
/**
|
|
173
|
-
*
|
|
174
|
-
*
|
|
175
|
-
*
|
|
176
|
-
*
|
|
177
|
-
*
|
|
178
|
-
*
|
|
179
|
-
*
|
|
180
|
-
*
|
|
181
|
-
*
|
|
182
|
-
*
|
|
183
|
-
* -
|
|
184
|
-
*
|
|
185
|
-
*
|
|
186
|
-
*
|
|
187
|
-
*
|
|
188
|
-
*
|
|
189
|
-
*
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
-
*
|
|
193
|
-
*
|
|
194
|
-
*
|
|
195
|
-
*
|
|
196
|
-
*
|
|
197
|
-
*
|
|
198
|
-
* -
|
|
199
|
-
*
|
|
200
|
-
*
|
|
201
|
-
* -
|
|
182
|
+
* Return the status and optional parsed elements for an async build identified by
|
|
183
|
+
* `build_id`.
|
|
184
|
+
*
|
|
185
|
+
* Use this endpoint to poll the result of an async ingestion or re-process
|
|
186
|
+
* request. The `build_id` is returned in the response of:
|
|
187
|
+
*
|
|
188
|
+
* - `POST /v2/sources/upload` (async file upload)
|
|
189
|
+
* - `POST /v2/sources/upload-url-source` (async URL ingestion)
|
|
190
|
+
* - `POST /v2/sources/upload-github-source` (async GitHub ingestion)
|
|
191
|
+
* - `POST /v2/sources/upload-youtube-source` (async YouTube ingestion)
|
|
192
|
+
* - `POST /v2/sources/process` (async re-process)
|
|
193
|
+
*
|
|
194
|
+
* **Path parameter:**
|
|
195
|
+
*
|
|
196
|
+
* - **build_id** (str, required): The build identifier returned when the job was
|
|
197
|
+
* scheduled.
|
|
198
|
+
*
|
|
199
|
+
* **Query parameters:**
|
|
200
|
+
*
|
|
201
|
+
* - **suppress_elements** (bool, default `false`): When `true`, elements are
|
|
202
|
+
* omitted from the response. When `false` (default), the response includes the
|
|
203
|
+
* parsed elements (chunks/partitions) for the build if it completed
|
|
204
|
+
* successfully. Same structure as `POST /sources/elements` (each element has
|
|
205
|
+
* `page_content` and `metadata`). If `page` and `page_size` are not passed, all
|
|
206
|
+
* elements are returned.
|
|
207
|
+
* - **suppress_img_base64** (bool, default `false`): When `true`, `img_base64` is
|
|
208
|
+
* omitted from each element (useful to reduce payload size when images are not
|
|
209
|
+
* needed).
|
|
210
|
+
* - **page** (int, optional): 1-based page number. Only used when
|
|
211
|
+
* `suppress_elements=false` and pagination is used (pass either `page` or
|
|
212
|
+
* `page_size` to enable pagination).
|
|
213
|
+
* - **page_size** (int, optional): Number of elements per page (max 100). Only
|
|
214
|
+
* used when `suppress_elements=false` and pagination is used.
|
|
215
|
+
*
|
|
216
|
+
* **Response fields:**
|
|
217
|
+
*
|
|
218
|
+
* - **build_id**: The requested build identifier.
|
|
219
|
+
* - **status**: SourceNodeStatus value when history exists (e.g. Processed,
|
|
220
|
+
* Processing, Processing failed). `not_found` when no history exists (build in
|
|
221
|
+
* progress or invalid id).
|
|
222
|
+
* - **success**: `true` only when `status == "Completed"`
|
|
223
|
+
* (SourceNodeStatus.COMPLETED).
|
|
224
|
+
* - **file_id**, **file_name**: Source identifiers; present when the build has
|
|
225
|
+
* been persisted (history exists).
|
|
226
|
+
* - **error**: Error message from the pipeline when the build failed.
|
|
227
|
+
* - **method**, **total_partitions**, **total_pages**: Build metadata when history
|
|
228
|
+
* exists.
|
|
229
|
+
* - **created_at**, **updated_at**: ISO8601 timestamps when history exists.
|
|
230
|
+
* - **message**: Human-readable message (e.g. when status is `not_found`).
|
|
231
|
+
* - **elements**: List of `{ page_content, metadata }` when
|
|
232
|
+
* `suppress_elements=false` and the build completed successfully.
|
|
233
|
+
* - **total_elements**, **page**, **page_size**, **total_pages_elements**:
|
|
234
|
+
* Pagination metadata for `elements` when `suppress_elements=false`.
|
|
202
235
|
*
|
|
203
236
|
* **Error responses:**
|
|
204
237
|
*
|
|
205
|
-
* - `400` — Invalid input (e.g. neither identifier provided).
|
|
206
|
-
* - `404` — Source file not found.
|
|
207
238
|
* - `500` — Unexpected internal error.
|
|
208
239
|
*
|
|
209
240
|
* @example
|
|
210
241
|
* ```ts
|
|
211
|
-
* const response = await client.sources.
|
|
242
|
+
* const response = await client.sources.getBuildStatus(
|
|
243
|
+
* 'build_id',
|
|
244
|
+
* );
|
|
212
245
|
* ```
|
|
213
246
|
*/
|
|
214
|
-
|
|
215
|
-
|
|
247
|
+
getBuildStatus(
|
|
248
|
+
buildID: string,
|
|
249
|
+
query: SourceGetBuildStatusParams | null | undefined = {},
|
|
216
250
|
options?: RequestOptions,
|
|
217
|
-
): APIPromise<
|
|
218
|
-
return this._client.
|
|
251
|
+
): APIPromise<SourceGetBuildStatusResponse> {
|
|
252
|
+
return this._client.get(path`/sources/builds/${buildID}`, { query, ...options });
|
|
219
253
|
}
|
|
220
254
|
|
|
221
255
|
/**
|
|
222
|
-
*
|
|
223
|
-
*
|
|
224
|
-
* Use this endpoint to re-run the data-ingestion pipeline on a source that is
|
|
225
|
-
* already present in the knowledge graph — for example, after changing the
|
|
226
|
-
* partitioning strategy. The endpoint locates the source node, sets its status to
|
|
227
|
-
* `PROCESSING`, applies the requested partition method, and executes the full
|
|
228
|
-
* ingestion pipeline synchronously (partitioning, chunking, embedding, and graph
|
|
229
|
-
* persistence).
|
|
230
|
-
*
|
|
231
|
-
* **Parameters (JSON body):**
|
|
256
|
+
* Retrieve the parsed elements (chunks/partitions) of a source in the same format
|
|
257
|
+
* as get_build_status.
|
|
232
258
|
*
|
|
233
|
-
*
|
|
234
|
-
*
|
|
235
|
-
*
|
|
236
|
-
* Use `file_id` instead when possible. At least one of `file_id` or `file_name`
|
|
237
|
-
* must be provided.
|
|
238
|
-
* - **partition_method** (str, default `"basic"`): The partitioning strategy to
|
|
239
|
-
* apply. One of: `basic` (Fast), `hi_res` (Balanced), `hi_res_ft` (Accurate),
|
|
240
|
-
* `mai` (VLM), `graphorlm` (Agentic).
|
|
259
|
+
* Returns elements with explicit fields: element_id, element_type, text, markdown,
|
|
260
|
+
* html, img_base64 (optional), position, page_number, bounding_box, page_layout,
|
|
261
|
+
* etc.
|
|
241
262
|
*
|
|
242
|
-
* **
|
|
263
|
+
* **Query parameters:**
|
|
243
264
|
*
|
|
244
|
-
* **
|
|
265
|
+
* - **file_id** (str, required): Unique identifier of the source.
|
|
266
|
+
* - **page** (int, optional): 1-based page number. Use with page_size to enable
|
|
267
|
+
* pagination.
|
|
268
|
+
* - **page_size** (int, optional): Number of elements per page (max 100).
|
|
269
|
+
* - **suppress_img_base64** (bool, default false): When true, img_base64 is
|
|
270
|
+
* omitted from each element.
|
|
271
|
+
* - **type** (str, optional): Filter by element type (e.g. NarrativeText, Title,
|
|
272
|
+
* Table).
|
|
273
|
+
* - **page_numbers** (list, optional): Restrict to specific page numbers (repeat
|
|
274
|
+
* param for multiple).
|
|
275
|
+
* - **elementsToRemove** (list, optional): Element types to exclude (repeat param
|
|
276
|
+
* for multiple).
|
|
245
277
|
*
|
|
246
|
-
*
|
|
247
|
-
*
|
|
278
|
+
* **Returns** Paginated response with items as BuildStatusElement list (same shape
|
|
279
|
+
* as GET /builds/{build_id} elements).
|
|
248
280
|
*
|
|
249
281
|
* @example
|
|
250
282
|
* ```ts
|
|
251
|
-
* const
|
|
283
|
+
* const response = await client.sources.getElements({
|
|
284
|
+
* file_id: 'file_id',
|
|
285
|
+
* });
|
|
252
286
|
* ```
|
|
253
287
|
*/
|
|
254
|
-
|
|
255
|
-
|
|
288
|
+
getElements(
|
|
289
|
+
query: SourceGetElementsParams,
|
|
290
|
+
options?: RequestOptions,
|
|
291
|
+
): APIPromise<SourceGetElementsResponse> {
|
|
292
|
+
return this._client.get('/sources/get-elements', { query, ...options });
|
|
256
293
|
}
|
|
257
294
|
|
|
258
295
|
/**
|
|
259
|
-
*
|
|
296
|
+
* Upload a local file and schedule ingestion in the background.
|
|
260
297
|
*
|
|
261
|
-
*
|
|
262
|
-
*
|
|
263
|
-
*
|
|
264
|
-
* is a pure retrieval endpoint — it does **not** generate an answer; use
|
|
265
|
-
* `/ask-sources` for Q&A.
|
|
266
|
-
*
|
|
267
|
-
* **Parameters (JSON body):**
|
|
268
|
-
*
|
|
269
|
-
* - **query** (str, required): The natural-language search query used to find
|
|
270
|
-
* relevant chunks.
|
|
271
|
-
* - **file_ids** (list[str], optional — preferred): Restrict retrieval to specific
|
|
272
|
-
* source file IDs.
|
|
273
|
-
* - **file_names** (list[str], optional — deprecated): Restrict retrieval to
|
|
274
|
-
* specific source file names. Use `file_ids` when possible.
|
|
298
|
+
* Accepts **`multipart/form-data`** with the file. Validates size (max 100 MB) and
|
|
299
|
+
* extension, stores the file, then schedules the full data-ingestion pipeline in
|
|
300
|
+
* the background. Returns immediately with a `build_id` to poll for status.
|
|
275
301
|
*
|
|
276
|
-
* **
|
|
302
|
+
* **Parameters:**
|
|
277
303
|
*
|
|
278
|
-
* - `
|
|
279
|
-
*
|
|
280
|
-
*
|
|
281
|
-
* - `
|
|
304
|
+
* - **file** (`multipart/form-data`): The file to upload. Must include
|
|
305
|
+
* `Content-Length` and have a supported extension (pdf, doc, docx, csv, txt, md,
|
|
306
|
+
* etc.).
|
|
307
|
+
* - **method** (`form`, optional): Partitioning strategy. One of: `fast`,
|
|
308
|
+
* `balanced`, `accurate`, `vlm`, `agentic`. Default when omitted.
|
|
282
309
|
*
|
|
283
|
-
* **
|
|
284
|
-
*
|
|
285
|
-
* - `500` — Unexpected internal error during retrieval.
|
|
310
|
+
* **Returns** `AsyncIngestResponse` with `build_id`. Use it to check processing
|
|
311
|
+
* status.
|
|
286
312
|
*
|
|
287
313
|
* @example
|
|
288
314
|
* ```ts
|
|
289
|
-
* const response = await client.sources.
|
|
290
|
-
*
|
|
315
|
+
* const response = await client.sources.ingestFile({
|
|
316
|
+
* file: fs.createReadStream('path/to/file'),
|
|
291
317
|
* });
|
|
292
318
|
* ```
|
|
293
319
|
*/
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
320
|
+
ingestFile(body: SourceIngestFileParams, options?: RequestOptions): APIPromise<SourceIngestFileResponse> {
|
|
321
|
+
return this._client.post(
|
|
322
|
+
'/sources/ingest-file',
|
|
323
|
+
multipartFormRequestOptions({ body, ...options }, this._client),
|
|
324
|
+
);
|
|
299
325
|
}
|
|
300
326
|
|
|
301
327
|
/**
|
|
302
|
-
*
|
|
303
|
-
* graph.
|
|
304
|
-
*
|
|
305
|
-
* This endpoint accepts a **`multipart/form-data`** request containing the file to
|
|
306
|
-
* upload. It validates the file size (max 100 MB) and extension against the list
|
|
307
|
-
* of allowed types, stores the file, and then runs the full data-ingestion
|
|
308
|
-
* pipeline synchronously — including partitioning, chunking, embedding, and graph
|
|
309
|
-
* persistence.
|
|
310
|
-
*
|
|
311
|
-
* **Usage via SDK (AI agent / MCP context):**
|
|
312
|
-
*
|
|
313
|
-
* The SDK executes code in a **remote sandboxed container** that does **not** have
|
|
314
|
-
* access to the local filesystem. Because of this, `fs.createReadStream()` with a
|
|
315
|
-
* local file path will **not work** — the file does not exist inside the
|
|
316
|
-
* container.
|
|
317
|
-
*
|
|
318
|
-
* For **text-based files** (md, txt, csv, html, etc.), you can read the file
|
|
319
|
-
* content with a local tool (e.g. IDE file-read) and create a `File` object in
|
|
320
|
-
* memory:
|
|
321
|
-
*
|
|
322
|
-
* ```
|
|
323
|
-
* const content = `...file contents read locally...`;
|
|
324
|
-
* const file = new File([content], "my-document.md", { type: "text/markdown" });
|
|
325
|
-
* const result = await client.sources.upload({ file });
|
|
326
|
-
* ```
|
|
327
|
-
*
|
|
328
|
-
* For **binary files** (pdf, docx, images, video, audio, etc.), the in-memory
|
|
329
|
-
* approach is impractical. Instead, use **`curl`** or **`subprocess`** to call the
|
|
330
|
-
* upload endpoint directly from the local machine, where the file is accessible:
|
|
331
|
-
*
|
|
332
|
-
* ```
|
|
333
|
-
* import { execSync } from "child_process";
|
|
334
|
-
*
|
|
335
|
-
* const result = execSync(`
|
|
336
|
-
* curl -s -X POST "{base_url}/source/upload" \
|
|
337
|
-
* -H "Authorization: Bearer {access_key}" \
|
|
338
|
-
* -F "file=@/local/path/to/document.pdf" \
|
|
339
|
-
* -F "partition_method=graphorlm"
|
|
340
|
-
* `).toString();
|
|
341
|
-
* ```
|
|
328
|
+
* Ingest a GitHub repository as a source into the project's knowledge graph.
|
|
342
329
|
*
|
|
343
|
-
*
|
|
330
|
+
* Schedules the ingestion in the background and returns immediately with a
|
|
331
|
+
* `build_id`. Use the returned `build_id` to poll for processing status.
|
|
344
332
|
*
|
|
345
|
-
*
|
|
346
|
-
* import subprocess, json
|
|
347
|
-
*
|
|
348
|
-
* result = subprocess.run([
|
|
349
|
-
* "curl", "-s", "-X", "POST", "{base_url}/source/upload",
|
|
350
|
-
* "-H", "Authorization: Bearer {access_key}",
|
|
351
|
-
* "-F", "file=@/local/path/to/document.pdf",
|
|
352
|
-
* "-F", "partition_method=graphorlm",
|
|
353
|
-
* ], capture_output=True, text=True)
|
|
354
|
-
* response = json.loads(result.stdout)
|
|
355
|
-
* ```
|
|
333
|
+
* **Parameters (JSON body):**
|
|
356
334
|
*
|
|
357
|
-
* **
|
|
358
|
-
*
|
|
359
|
-
* Always prefer `curl`/`requests` executed locally for binary uploads.
|
|
335
|
+
* - **url** (str, required): The GitHub repository URL to ingest (e.g.
|
|
336
|
+
* `https://github.com/owner/repo`).
|
|
360
337
|
*
|
|
361
|
-
* **
|
|
338
|
+
* **Returns** `AsyncIngestResponse` with `build_id`.
|
|
362
339
|
*
|
|
340
|
+
* @example
|
|
341
|
+
* ```ts
|
|
342
|
+
* const response = await client.sources.ingestGitHub({
|
|
343
|
+
* url: 'url',
|
|
344
|
+
* });
|
|
363
345
|
* ```
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
346
|
+
*/
|
|
347
|
+
ingestGitHub(
|
|
348
|
+
body: SourceIngestGitHubParams,
|
|
349
|
+
options?: RequestOptions,
|
|
350
|
+
): APIPromise<SourceIngestGitHubResponse> {
|
|
351
|
+
return this._client.post('/sources/ingest-github', { body, ...options });
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Ingest a web page (or a set of crawled pages) as a source into the project's
|
|
356
|
+
* knowledge graph.
|
|
369
357
|
*
|
|
370
|
-
*
|
|
358
|
+
* Unlike the synchronous version, this endpoint schedules the ingestion in the
|
|
359
|
+
* background and returns immediately with a `processing` status. The source will
|
|
360
|
+
* be fully available once background processing completes.
|
|
371
361
|
*
|
|
372
|
-
*
|
|
373
|
-
*
|
|
374
|
-
*
|
|
375
|
-
*
|
|
376
|
-
* response = requests.post(
|
|
377
|
-
* "{base_url}/source/upload",
|
|
378
|
-
* headers={"Authorization": "Bearer {access_key}"},
|
|
379
|
-
* files={"file": ("document.pdf", f, "application/pdf")},
|
|
380
|
-
* data={"partition_method": "graphorlm"}, # optional
|
|
381
|
-
* )
|
|
382
|
-
* ```
|
|
362
|
+
* If the URL points directly to a downloadable file (detected via URL path
|
|
363
|
+
* extension or HTTP Content-Type), the file is first downloaded and uploaded to
|
|
364
|
+
* storage synchronously, then the partition/embedding pipeline runs in the
|
|
365
|
+
* background.
|
|
383
366
|
*
|
|
384
|
-
* **Parameters:**
|
|
367
|
+
* **Parameters (JSON body):**
|
|
385
368
|
*
|
|
386
|
-
* - **
|
|
387
|
-
*
|
|
388
|
-
*
|
|
389
|
-
*
|
|
390
|
-
* - **
|
|
391
|
-
*
|
|
392
|
-
*
|
|
369
|
+
* - **url** (str, required): The web page URL to ingest.
|
|
370
|
+
* - **crawlUrls** (bool, optional, default `false`): When `true`, the system will
|
|
371
|
+
* also follow and ingest links found on the page. Ignored when the URL resolves
|
|
372
|
+
* to a file.
|
|
373
|
+
* - **method** (str, optional): The partitioning strategy to use. One of: `fast`,
|
|
374
|
+
* `balanced`, `accurate`, `vlm`, `agentic`. When omitted the system default is
|
|
375
|
+
* applied.
|
|
393
376
|
*
|
|
394
|
-
* **Returns** a `PublicSourceResponse` with
|
|
395
|
-
*
|
|
377
|
+
* **Returns** a `PublicSourceResponse` with `status: "processing"` immediately.
|
|
378
|
+
* Poll the source status endpoint using the returned `file_id` to track
|
|
379
|
+
* completion.
|
|
396
380
|
*
|
|
397
381
|
* **Error responses:**
|
|
398
382
|
*
|
|
399
|
-
* - `400` — Unsupported file type
|
|
400
|
-
* - `
|
|
401
|
-
* - `413` — File exceeds the 100 MB size limit.
|
|
402
|
-
* - `403` — Permission denied.
|
|
403
|
-
* - `404` — File not found during processing.
|
|
404
|
-
* - `500` — Unexpected internal error.
|
|
383
|
+
* - `400` — Unsupported file type detected from a file URL.
|
|
384
|
+
* - `500` — Unexpected internal error during URL processing.
|
|
405
385
|
*
|
|
406
386
|
* @example
|
|
407
387
|
* ```ts
|
|
408
|
-
* const
|
|
409
|
-
*
|
|
388
|
+
* const response = await client.sources.ingestURL({
|
|
389
|
+
* url: 'url',
|
|
410
390
|
* });
|
|
411
391
|
* ```
|
|
412
392
|
*/
|
|
413
|
-
|
|
414
|
-
return this._client.post(
|
|
415
|
-
'/sources/upload',
|
|
416
|
-
multipartFormRequestOptions({ body, ...options }, this._client),
|
|
417
|
-
);
|
|
393
|
+
ingestURL(body: SourceIngestURLParams, options?: RequestOptions): APIPromise<SourceIngestURLResponse> {
|
|
394
|
+
return this._client.post('/sources/ingest-url', { body, ...options });
|
|
418
395
|
}
|
|
419
396
|
|
|
420
397
|
/**
|
|
421
|
-
* Ingest a
|
|
398
|
+
* Ingest a YouTube video as a source into the project's knowledge graph.
|
|
422
399
|
*
|
|
423
|
-
*
|
|
424
|
-
*
|
|
425
|
-
*
|
|
400
|
+
* Schedules the ingestion in the background and returns immediately with a
|
|
401
|
+
* `build_id`. The endpoint will download the transcript/captions and process them
|
|
402
|
+
* in the background. Use the returned `build_id` to poll for processing status.
|
|
426
403
|
*
|
|
427
404
|
* **Parameters (JSON body):**
|
|
428
405
|
*
|
|
429
|
-
* - **url** (str, required): The
|
|
430
|
-
* `https://
|
|
431
|
-
*
|
|
432
|
-
* **Returns** a `PublicSourceResponse` with the resulting source metadata (file
|
|
433
|
-
* ID, name, size, type, source origin, partition method, and processing status).
|
|
434
|
-
*
|
|
435
|
-
* **Error responses:**
|
|
406
|
+
* - **url** (str, required): The YouTube video URL to ingest (e.g.
|
|
407
|
+
* `https://www.youtube.com/watch?v=...`).
|
|
436
408
|
*
|
|
437
|
-
*
|
|
409
|
+
* **Returns** `AsyncIngestResponse` with `build_id`.
|
|
438
410
|
*
|
|
439
411
|
* @example
|
|
440
412
|
* ```ts
|
|
441
|
-
* const
|
|
413
|
+
* const response = await client.sources.ingestYoutube({
|
|
442
414
|
* url: 'url',
|
|
443
415
|
* });
|
|
444
416
|
* ```
|
|
445
417
|
*/
|
|
446
|
-
|
|
447
|
-
|
|
418
|
+
ingestYoutube(
|
|
419
|
+
body: SourceIngestYoutubeParams,
|
|
420
|
+
options?: RequestOptions,
|
|
421
|
+
): APIPromise<SourceIngestYoutubeResponse> {
|
|
422
|
+
return this._client.post('/sources/ingest-youtube', { body, ...options });
|
|
448
423
|
}
|
|
449
424
|
|
|
450
425
|
/**
|
|
451
|
-
*
|
|
452
|
-
* knowledge graph.
|
|
426
|
+
* Re-process (re-parse) an existing source in the background.
|
|
453
427
|
*
|
|
454
|
-
*
|
|
455
|
-
*
|
|
456
|
-
*
|
|
457
|
-
* synchronously.
|
|
428
|
+
* Schedules the data-ingestion pipeline (partitioning, chunking, embedding) for an
|
|
429
|
+
* existing source and returns immediately with a `build_id`. Use it to poll for
|
|
430
|
+
* status.
|
|
458
431
|
*
|
|
459
432
|
* **Parameters (JSON body):**
|
|
460
433
|
*
|
|
461
|
-
* - **
|
|
462
|
-
* - **
|
|
463
|
-
*
|
|
464
|
-
* - **partition_method** (str, optional): The partitioning strategy to use. One
|
|
465
|
-
* of: `basic` (Fast), `hi_res` (Balanced), `hi_res_ft` (Accurate), `mai` (VLM),
|
|
466
|
-
* `graphorlm` (Agentic). When omitted the system default is applied.
|
|
467
|
-
*
|
|
468
|
-
* **Returns** a `PublicSourceResponse` with the resulting source metadata (file
|
|
469
|
-
* ID, name, size, type, source origin, partition method, and processing status).
|
|
434
|
+
* - **file_id** (str, required): Unique identifier of the source to re-process.
|
|
435
|
+
* - **method** (str, default `"fast"`): Partitioning strategy. One of: `fast`,
|
|
436
|
+
* `balanced`, `accurate`, `vlm`, `agentic`.
|
|
470
437
|
*
|
|
471
|
-
* **
|
|
472
|
-
*
|
|
473
|
-
* - `500` — Unexpected internal error during URL processing.
|
|
438
|
+
* **Returns** `AsyncIngestResponse` with `build_id`.
|
|
474
439
|
*
|
|
475
440
|
* @example
|
|
476
441
|
* ```ts
|
|
477
|
-
* const
|
|
478
|
-
*
|
|
442
|
+
* const response = await client.sources.reprocess({
|
|
443
|
+
* file_id: 'file_id',
|
|
479
444
|
* });
|
|
480
445
|
* ```
|
|
481
446
|
*/
|
|
482
|
-
|
|
483
|
-
return this._client.post('/sources/
|
|
447
|
+
reprocess(body: SourceReprocessParams, options?: RequestOptions): APIPromise<SourceReprocessResponse> {
|
|
448
|
+
return this._client.post('/sources/reprocess', { body, ...options });
|
|
484
449
|
}
|
|
485
450
|
|
|
486
451
|
/**
|
|
487
|
-
*
|
|
452
|
+
* Retrieve relevant document chunks from the prebuilt RAG vector store.
|
|
488
453
|
*
|
|
489
|
-
*
|
|
490
|
-
*
|
|
491
|
-
*
|
|
454
|
+
* Performs a semantic similarity search over the project's prebuilt RAG store
|
|
455
|
+
* using Google File Search with grounding. Returns the most relevant text chunks
|
|
456
|
+
* along with their source metadata (file name, page number, relevance score). This
|
|
457
|
+
* is a pure retrieval endpoint — it does **not** generate an answer; use
|
|
458
|
+
* `/ask-sources` for Q&A.
|
|
492
459
|
*
|
|
493
460
|
* **Parameters (JSON body):**
|
|
494
461
|
*
|
|
495
|
-
* - **
|
|
496
|
-
*
|
|
462
|
+
* - **query** (str, required): The natural-language search query used to find
|
|
463
|
+
* relevant chunks.
|
|
464
|
+
* - **file_ids** (list[str], optional — preferred): Restrict retrieval to specific
|
|
465
|
+
* source file IDs.
|
|
466
|
+
* - **file_names** (list[str], optional — deprecated): Restrict retrieval to
|
|
467
|
+
* specific source file names. Use `file_ids` when possible.
|
|
497
468
|
*
|
|
498
|
-
* **Returns** a `
|
|
499
|
-
*
|
|
469
|
+
* **Returns** a `PublicRetrieveResponse` containing:
|
|
470
|
+
*
|
|
471
|
+
* - `query` — the original search query.
|
|
472
|
+
* - `chunks` — a list of `PublicRetrieveChunk` objects, each with `text`,
|
|
473
|
+
* `file_name`, `page_number`, `score`, and additional `metadata`.
|
|
474
|
+
* - `total` — the total number of chunks returned.
|
|
500
475
|
*
|
|
501
476
|
* **Error responses:**
|
|
502
477
|
*
|
|
503
|
-
* - `500` — Unexpected internal error during
|
|
478
|
+
* - `500` — Unexpected internal error during retrieval.
|
|
504
479
|
*
|
|
505
480
|
* @example
|
|
506
481
|
* ```ts
|
|
507
|
-
* const
|
|
508
|
-
*
|
|
482
|
+
* const response = await client.sources.retrieveChunks({
|
|
483
|
+
* query: 'query',
|
|
509
484
|
* });
|
|
510
485
|
* ```
|
|
511
486
|
*/
|
|
512
|
-
|
|
513
|
-
|
|
487
|
+
retrieveChunks(
|
|
488
|
+
body: SourceRetrieveChunksParams,
|
|
489
|
+
options?: RequestOptions,
|
|
490
|
+
): APIPromise<SourceRetrieveChunksResponse> {
|
|
491
|
+
return this._client.post('/sources/prebuilt-rag', { body, ...options });
|
|
514
492
|
}
|
|
515
493
|
}
|
|
516
494
|
|
|
517
495
|
/**
|
|
518
|
-
*
|
|
496
|
+
* A single parsed element (chunk/partition) from a source, with explicit fields.
|
|
497
|
+
*/
|
|
498
|
+
export interface Element {
|
|
499
|
+
/**
|
|
500
|
+
* Bounding box (e.g. left, top, width, height) when available.
|
|
501
|
+
*/
|
|
502
|
+
bounding_box?: { [key: string]: unknown } | null;
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Unique identifier for the element.
|
|
506
|
+
*/
|
|
507
|
+
element_id?: string | null;
|
|
508
|
+
|
|
509
|
+
/**
|
|
510
|
+
* Type of the element (Title, NarrativeText, Image, Table, etc.).
|
|
511
|
+
*/
|
|
512
|
+
element_type?:
|
|
513
|
+
| 'Title'
|
|
514
|
+
| 'NarrativeText'
|
|
515
|
+
| 'TextBlock'
|
|
516
|
+
| 'ListItem'
|
|
517
|
+
| 'Table'
|
|
518
|
+
| 'TableRow'
|
|
519
|
+
| 'Image'
|
|
520
|
+
| 'Footer'
|
|
521
|
+
| 'Formula'
|
|
522
|
+
| 'CompositeElement'
|
|
523
|
+
| 'FigureCaption'
|
|
524
|
+
| 'PageBreak'
|
|
525
|
+
| 'Address'
|
|
526
|
+
| 'EmailAddress'
|
|
527
|
+
| 'PageNumber'
|
|
528
|
+
| 'CodeSnippet'
|
|
529
|
+
| 'Header'
|
|
530
|
+
| 'FormKeysValues'
|
|
531
|
+
| 'Link'
|
|
532
|
+
| 'UncategorizedText'
|
|
533
|
+
| 'Abstract'
|
|
534
|
+
| 'AsideText'
|
|
535
|
+
| 'Reference'
|
|
536
|
+
| 'ReferenceContent'
|
|
537
|
+
| 'Chart'
|
|
538
|
+
| 'Seal'
|
|
539
|
+
| 'FormulaNumber'
|
|
540
|
+
| null;
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* HTML representation of the content, when available.
|
|
544
|
+
*/
|
|
545
|
+
html?: string | null;
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Base64-encoded image data, when the element is an image.
|
|
549
|
+
*/
|
|
550
|
+
img_base64?: string | null;
|
|
551
|
+
|
|
552
|
+
/**
|
|
553
|
+
* Markdown representation of the content, when available.
|
|
554
|
+
*/
|
|
555
|
+
markdown?: string | null;
|
|
556
|
+
|
|
557
|
+
/**
|
|
558
|
+
* Additional metadata.
|
|
559
|
+
*/
|
|
560
|
+
metadata?: { [key: string]: unknown };
|
|
561
|
+
|
|
562
|
+
/**
|
|
563
|
+
* Annotation/summary for the page containing this element.
|
|
564
|
+
*/
|
|
565
|
+
page_annotation?: string | null;
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* Keywords extracted for the page.
|
|
569
|
+
*/
|
|
570
|
+
page_keywords?: Array<string> | null;
|
|
571
|
+
|
|
572
|
+
/**
|
|
573
|
+
* Page dimensions (width, height) when available.
|
|
574
|
+
*/
|
|
575
|
+
page_layout?: { [key: string]: unknown } | null;
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Page number (1-based) where the element appears.
|
|
579
|
+
*/
|
|
580
|
+
page_number?: number | null;
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Topics extracted for the page.
|
|
584
|
+
*/
|
|
585
|
+
page_topics?: Array<string> | null;
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Order/position of the element within the document.
|
|
589
|
+
*/
|
|
590
|
+
position?: number | null;
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Plain text content of the element.
|
|
594
|
+
*/
|
|
595
|
+
text?: string;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
/**
|
|
599
|
+
* Public-facing partition method names for API v2.
|
|
519
600
|
*
|
|
520
|
-
*
|
|
601
|
+
* Maps to internal PartitionMethod as:
|
|
521
602
|
*
|
|
522
|
-
* -
|
|
523
|
-
* -
|
|
524
|
-
* -
|
|
525
|
-
* -
|
|
526
|
-
* -
|
|
603
|
+
* - fast → basic
|
|
604
|
+
* - balanced → hi_res
|
|
605
|
+
* - accurate → hi_res_ft
|
|
606
|
+
* - vlm → mai
|
|
607
|
+
* - agentic → graphorlm
|
|
527
608
|
*/
|
|
528
|
-
export type
|
|
609
|
+
export type Method = 'fast' | 'balanced' | 'accurate' | 'vlm' | 'agentic';
|
|
529
610
|
|
|
530
611
|
export interface PublicSource {
|
|
531
612
|
/**
|
|
@@ -575,19 +656,10 @@ export interface PublicSource {
|
|
|
575
656
|
file_id?: string | null;
|
|
576
657
|
|
|
577
658
|
/**
|
|
578
|
-
* Partitioning strategy used during ingestion.
|
|
579
|
-
*
|
|
659
|
+
* Partitioning strategy used during ingestion. V1 API: basic, hi_res, hi_res_ft,
|
|
660
|
+
* mai, graphorlm. V2 API: fast, balanced, accurate, vlm, agentic.
|
|
580
661
|
*/
|
|
581
|
-
|
|
582
|
-
| 'basic'
|
|
583
|
-
| 'hi_res'
|
|
584
|
-
| 'hi_res_ft'
|
|
585
|
-
| 'mai'
|
|
586
|
-
| 'graphorlm'
|
|
587
|
-
| 'ocr'
|
|
588
|
-
| 'advanced'
|
|
589
|
-
| 'yolox'
|
|
590
|
-
| null;
|
|
662
|
+
method?: string | null;
|
|
591
663
|
}
|
|
592
664
|
|
|
593
665
|
export type SourceListResponse = Array<PublicSource>;
|
|
@@ -672,11 +744,121 @@ export interface SourceExtractResponse {
|
|
|
672
744
|
structured_output?: { [key: string]: unknown } | null;
|
|
673
745
|
}
|
|
674
746
|
|
|
675
|
-
|
|
747
|
+
/**
|
|
748
|
+
* Status and optional result for an async build (ingestion/re-process) identified
|
|
749
|
+
* by build_id.
|
|
750
|
+
*
|
|
751
|
+
* Returned by GET /v2/sources/builds/{build_id}. When the build has completed
|
|
752
|
+
* successfully, includes file_id, file_name, and optionally paginated elements
|
|
753
|
+
* (parsed chunks).
|
|
754
|
+
*/
|
|
755
|
+
export interface SourceGetBuildStatusResponse {
|
|
756
|
+
/**
|
|
757
|
+
* The build identifier returned when the ingestion was scheduled.
|
|
758
|
+
*/
|
|
759
|
+
build_id: string;
|
|
760
|
+
|
|
761
|
+
/**
|
|
762
|
+
* Current build status. When a build history exists, this is a SourceNodeStatus
|
|
763
|
+
* value (e.g. Completed, Processing, Processing failed). When no history exists
|
|
764
|
+
* yet: not_found.
|
|
765
|
+
*/
|
|
766
|
+
status: string;
|
|
767
|
+
|
|
768
|
+
/**
|
|
769
|
+
* True if the build completed successfully (status is Completed).
|
|
770
|
+
*/
|
|
771
|
+
success: boolean;
|
|
772
|
+
|
|
773
|
+
/**
|
|
774
|
+
* ISO8601 timestamp when the build (history) was created. Present when history
|
|
775
|
+
* exists.
|
|
776
|
+
*/
|
|
777
|
+
created_at?: string | null;
|
|
778
|
+
|
|
779
|
+
/**
|
|
780
|
+
* Paginated list of parsed elements (chunks) for this build. Only present when
|
|
781
|
+
* suppress_elements=false and the build has completed (status Completed).
|
|
782
|
+
*/
|
|
783
|
+
elements?: Array<Element> | null;
|
|
784
|
+
|
|
785
|
+
/**
|
|
786
|
+
* Error message from the pipeline, if the build failed (e.g. processing_failed).
|
|
787
|
+
*/
|
|
788
|
+
error?: string | null;
|
|
789
|
+
|
|
790
|
+
/**
|
|
791
|
+
* Source file identifier. Present when the build has been persisted (history
|
|
792
|
+
* exists).
|
|
793
|
+
*/
|
|
794
|
+
file_id?: string | null;
|
|
795
|
+
|
|
796
|
+
/**
|
|
797
|
+
* Display name of the source file. Present when the build has been persisted.
|
|
798
|
+
*/
|
|
799
|
+
file_name?: string | null;
|
|
800
|
+
|
|
801
|
+
/**
|
|
802
|
+
* Human-readable message (e.g. when status is not_found or processing).
|
|
803
|
+
*/
|
|
804
|
+
message?: string | null;
|
|
805
|
+
|
|
806
|
+
/**
|
|
807
|
+
* Public-facing partition method names for API v2.
|
|
808
|
+
*
|
|
809
|
+
* Maps to internal PartitionMethod as:
|
|
810
|
+
*
|
|
811
|
+
* - fast → basic
|
|
812
|
+
* - balanced → hi_res
|
|
813
|
+
* - accurate → hi_res_ft
|
|
814
|
+
* - vlm → mai
|
|
815
|
+
* - agentic → graphorlm
|
|
816
|
+
*/
|
|
817
|
+
method?: Method | null;
|
|
818
|
+
|
|
819
|
+
/**
|
|
820
|
+
* Current page of elements (1-based). Null when no pagination was requested (all
|
|
821
|
+
* elements returned).
|
|
822
|
+
*/
|
|
823
|
+
page?: number | null;
|
|
824
|
+
|
|
825
|
+
/**
|
|
826
|
+
* Number of elements per page. Null when no pagination was requested.
|
|
827
|
+
*/
|
|
828
|
+
page_size?: number | null;
|
|
829
|
+
|
|
830
|
+
/**
|
|
831
|
+
* Total number of elements for this build. Present when suppress_elements=false.
|
|
832
|
+
*/
|
|
833
|
+
total_elements?: number | null;
|
|
834
|
+
|
|
835
|
+
/**
|
|
836
|
+
* Total pages in the source for this build. Present when history exists.
|
|
837
|
+
*/
|
|
838
|
+
total_pages?: number | null;
|
|
839
|
+
|
|
840
|
+
/**
|
|
841
|
+
* Total number of pages of elements. Null when no pagination was requested.
|
|
842
|
+
*/
|
|
843
|
+
total_pages_elements?: number | null;
|
|
844
|
+
|
|
845
|
+
/**
|
|
846
|
+
* Total number of partitions created in this build. Present when history exists.
|
|
847
|
+
*/
|
|
848
|
+
total_partitions?: number | null;
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* ISO8601 timestamp when the build (history) was last updated. Present when
|
|
852
|
+
* history exists.
|
|
853
|
+
*/
|
|
854
|
+
updated_at?: string | null;
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
export interface SourceGetElementsResponse {
|
|
676
858
|
/**
|
|
677
859
|
* List of items in the current page
|
|
678
860
|
*/
|
|
679
|
-
items: Array<
|
|
861
|
+
items: Array<Element>;
|
|
680
862
|
|
|
681
863
|
/**
|
|
682
864
|
* Total number of items
|
|
@@ -699,30 +881,89 @@ export interface SourceLoadElementsResponse {
|
|
|
699
881
|
total_pages?: number | null;
|
|
700
882
|
}
|
|
701
883
|
|
|
702
|
-
export
|
|
884
|
+
export interface SourceIngestFileResponse {
|
|
703
885
|
/**
|
|
704
|
-
*
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
*
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
*
|
|
715
|
-
* )
|
|
886
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
887
|
+
*/
|
|
888
|
+
build_id: string;
|
|
889
|
+
|
|
890
|
+
/**
|
|
891
|
+
* If the request was not successful, this will contain an error message.
|
|
892
|
+
*/
|
|
893
|
+
error?: string | null;
|
|
894
|
+
|
|
895
|
+
/**
|
|
896
|
+
* Whether the request was successfully scheduled.
|
|
716
897
|
*/
|
|
717
|
-
|
|
718
|
-
|
|
898
|
+
success?: boolean;
|
|
899
|
+
}
|
|
719
900
|
|
|
720
|
-
|
|
901
|
+
export interface SourceIngestGitHubResponse {
|
|
902
|
+
/**
|
|
903
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
904
|
+
*/
|
|
905
|
+
build_id: string;
|
|
721
906
|
|
|
722
|
-
|
|
907
|
+
/**
|
|
908
|
+
* If the request was not successful, this will contain an error message.
|
|
909
|
+
*/
|
|
910
|
+
error?: string | null;
|
|
723
911
|
|
|
724
|
-
|
|
725
|
-
|
|
912
|
+
/**
|
|
913
|
+
* Whether the request was successfully scheduled.
|
|
914
|
+
*/
|
|
915
|
+
success?: boolean;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
export interface SourceIngestURLResponse {
|
|
919
|
+
/**
|
|
920
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
921
|
+
*/
|
|
922
|
+
build_id: string;
|
|
923
|
+
|
|
924
|
+
/**
|
|
925
|
+
* If the request was not successful, this will contain an error message.
|
|
926
|
+
*/
|
|
927
|
+
error?: string | null;
|
|
928
|
+
|
|
929
|
+
/**
|
|
930
|
+
* Whether the request was successfully scheduled.
|
|
931
|
+
*/
|
|
932
|
+
success?: boolean;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
export interface SourceIngestYoutubeResponse {
|
|
936
|
+
/**
|
|
937
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
938
|
+
*/
|
|
939
|
+
build_id: string;
|
|
940
|
+
|
|
941
|
+
/**
|
|
942
|
+
* If the request was not successful, this will contain an error message.
|
|
943
|
+
*/
|
|
944
|
+
error?: string | null;
|
|
945
|
+
|
|
946
|
+
/**
|
|
947
|
+
* Whether the request was successfully scheduled.
|
|
948
|
+
*/
|
|
949
|
+
success?: boolean;
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
export interface SourceReprocessResponse {
|
|
953
|
+
/**
|
|
954
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
955
|
+
*/
|
|
956
|
+
build_id: string;
|
|
957
|
+
|
|
958
|
+
/**
|
|
959
|
+
* If the request was not successful, this will contain an error message.
|
|
960
|
+
*/
|
|
961
|
+
error?: string | null;
|
|
962
|
+
|
|
963
|
+
/**
|
|
964
|
+
* Whether the request was successfully scheduled.
|
|
965
|
+
*/
|
|
966
|
+
success?: boolean;
|
|
726
967
|
}
|
|
727
968
|
|
|
728
969
|
export interface SourceRetrieveChunksResponse {
|
|
@@ -776,6 +1017,14 @@ export namespace SourceRetrieveChunksResponse {
|
|
|
776
1017
|
}
|
|
777
1018
|
}
|
|
778
1019
|
|
|
1020
|
+
export interface SourceListParams {
|
|
1021
|
+
/**
|
|
1022
|
+
* Optional list of file_id to filter by (only these sources are returned). Repeat
|
|
1023
|
+
* the param for multiple IDs.
|
|
1024
|
+
*/
|
|
1025
|
+
file_ids?: Array<string> | null;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
779
1028
|
export interface SourceDeleteParams {
|
|
780
1029
|
/**
|
|
781
1030
|
* Unique identifier for the source (preferred)
|
|
@@ -858,116 +1107,78 @@ export interface SourceExtractParams {
|
|
|
858
1107
|
thinking_level?: 'fast' | 'balanced' | 'accurate' | null;
|
|
859
1108
|
}
|
|
860
1109
|
|
|
861
|
-
export interface
|
|
862
|
-
/**
|
|
863
|
-
* Unique identifier for the source (preferred)
|
|
864
|
-
*/
|
|
865
|
-
file_id?: string | null;
|
|
866
|
-
|
|
867
|
-
/**
|
|
868
|
-
* The name of the file (deprecated, use file_id)
|
|
869
|
-
*/
|
|
870
|
-
file_name?: string | null;
|
|
871
|
-
|
|
872
|
-
/**
|
|
873
|
-
* Optional filter to narrow down the returned elements
|
|
874
|
-
*/
|
|
875
|
-
filter?: SourceLoadElementsParams.Filter | null;
|
|
876
|
-
|
|
877
|
-
/**
|
|
878
|
-
* Current page number
|
|
879
|
-
*/
|
|
1110
|
+
export interface SourceGetBuildStatusParams {
|
|
880
1111
|
page?: number | null;
|
|
881
1112
|
|
|
882
|
-
/**
|
|
883
|
-
* Number of items per page
|
|
884
|
-
*/
|
|
885
1113
|
page_size?: number | null;
|
|
1114
|
+
|
|
1115
|
+
suppress_elements?: boolean;
|
|
1116
|
+
|
|
1117
|
+
suppress_img_base64?: boolean;
|
|
886
1118
|
}
|
|
887
1119
|
|
|
888
|
-
export
|
|
1120
|
+
export interface SourceGetElementsParams {
|
|
889
1121
|
/**
|
|
890
|
-
*
|
|
1122
|
+
* Unique identifier of the source
|
|
891
1123
|
*/
|
|
892
|
-
|
|
893
|
-
/**
|
|
894
|
-
* List of element types to exclude from the results
|
|
895
|
-
*/
|
|
896
|
-
elementsToRemove?: Array<string> | null;
|
|
897
|
-
|
|
898
|
-
/**
|
|
899
|
-
* Restrict results to specific page numbers from the original document
|
|
900
|
-
*/
|
|
901
|
-
page_numbers?: Array<number> | null;
|
|
902
|
-
|
|
903
|
-
/**
|
|
904
|
-
* Filter by element type (e.g. NarrativeText, Title, Table)
|
|
905
|
-
*/
|
|
906
|
-
type?: string | null;
|
|
907
|
-
}
|
|
908
|
-
}
|
|
1124
|
+
file_id: string;
|
|
909
1125
|
|
|
910
|
-
export interface SourceParseParams {
|
|
911
1126
|
/**
|
|
912
|
-
*
|
|
1127
|
+
* Element types to exclude
|
|
913
1128
|
*/
|
|
914
|
-
|
|
1129
|
+
elementsToRemove?: Array<string> | null;
|
|
915
1130
|
|
|
916
1131
|
/**
|
|
917
|
-
*
|
|
1132
|
+
* 1-based page number (use with page_size)
|
|
918
1133
|
*/
|
|
919
|
-
|
|
1134
|
+
page?: number | null;
|
|
920
1135
|
|
|
921
1136
|
/**
|
|
922
|
-
*
|
|
923
|
-
* (Balanced), hi_res_ft (Accurate), mai (VLM), graphorlm (Agentic)
|
|
1137
|
+
* Restrict to specific page numbers
|
|
924
1138
|
*/
|
|
925
|
-
|
|
926
|
-
}
|
|
1139
|
+
page_numbers?: Array<number> | null;
|
|
927
1140
|
|
|
928
|
-
export interface SourceRetrieveChunksParams {
|
|
929
1141
|
/**
|
|
930
|
-
*
|
|
1142
|
+
* Number of elements per page
|
|
931
1143
|
*/
|
|
932
|
-
|
|
1144
|
+
page_size?: number | null;
|
|
933
1145
|
|
|
934
1146
|
/**
|
|
935
|
-
*
|
|
1147
|
+
* When true, img_base64 is omitted from each element
|
|
936
1148
|
*/
|
|
937
|
-
|
|
1149
|
+
suppress_img_base64?: boolean;
|
|
938
1150
|
|
|
939
1151
|
/**
|
|
940
|
-
*
|
|
941
|
-
* file_ids)
|
|
1152
|
+
* Filter by element type (e.g. NarrativeText, Title)
|
|
942
1153
|
*/
|
|
943
|
-
|
|
1154
|
+
type?: string | null;
|
|
944
1155
|
}
|
|
945
1156
|
|
|
946
|
-
export interface
|
|
1157
|
+
export interface SourceIngestFileParams {
|
|
947
1158
|
file: Uploadable;
|
|
948
1159
|
|
|
949
1160
|
/**
|
|
950
|
-
*
|
|
1161
|
+
* Public-facing partition method names for API v2.
|
|
951
1162
|
*
|
|
952
|
-
*
|
|
1163
|
+
* Maps to internal PartitionMethod as:
|
|
953
1164
|
*
|
|
954
|
-
* -
|
|
955
|
-
* -
|
|
956
|
-
* -
|
|
957
|
-
* -
|
|
958
|
-
* -
|
|
1165
|
+
* - fast → basic
|
|
1166
|
+
* - balanced → hi_res
|
|
1167
|
+
* - accurate → hi_res_ft
|
|
1168
|
+
* - vlm → mai
|
|
1169
|
+
* - agentic → graphorlm
|
|
959
1170
|
*/
|
|
960
|
-
|
|
1171
|
+
method?: Method | null;
|
|
961
1172
|
}
|
|
962
1173
|
|
|
963
|
-
export interface
|
|
1174
|
+
export interface SourceIngestGitHubParams {
|
|
964
1175
|
/**
|
|
965
1176
|
* The GitHub repository URL to ingest (e.g. https://github.com/owner/repo)
|
|
966
1177
|
*/
|
|
967
1178
|
url: string;
|
|
968
1179
|
}
|
|
969
1180
|
|
|
970
|
-
export interface
|
|
1181
|
+
export interface SourceIngestURLParams {
|
|
971
1182
|
/**
|
|
972
1183
|
* The web page URL to ingest
|
|
973
1184
|
*/
|
|
@@ -979,20 +1190,20 @@ export interface SourceUploadURLParams {
|
|
|
979
1190
|
crawlUrls?: boolean;
|
|
980
1191
|
|
|
981
1192
|
/**
|
|
982
|
-
*
|
|
1193
|
+
* Public-facing partition method names for API v2.
|
|
983
1194
|
*
|
|
984
|
-
*
|
|
1195
|
+
* Maps to internal PartitionMethod as:
|
|
985
1196
|
*
|
|
986
|
-
* -
|
|
987
|
-
* -
|
|
988
|
-
* -
|
|
989
|
-
* -
|
|
990
|
-
* -
|
|
1197
|
+
* - fast → basic
|
|
1198
|
+
* - balanced → hi_res
|
|
1199
|
+
* - accurate → hi_res_ft
|
|
1200
|
+
* - vlm → mai
|
|
1201
|
+
* - agentic → graphorlm
|
|
991
1202
|
*/
|
|
992
|
-
|
|
1203
|
+
method?: Method | null;
|
|
993
1204
|
}
|
|
994
1205
|
|
|
995
|
-
export interface
|
|
1206
|
+
export interface SourceIngestYoutubeParams {
|
|
996
1207
|
/**
|
|
997
1208
|
* The YouTube video URL to ingest (e.g.
|
|
998
1209
|
* https://www.youtube.com/watch?v=dQw4w9WgXcQ)
|
|
@@ -1000,25 +1211,64 @@ export interface SourceUploadYoutubeParams {
|
|
|
1000
1211
|
url: string;
|
|
1001
1212
|
}
|
|
1002
1213
|
|
|
1214
|
+
export interface SourceReprocessParams {
|
|
1215
|
+
/**
|
|
1216
|
+
* Unique identifier of the source to re-process.
|
|
1217
|
+
*/
|
|
1218
|
+
file_id: string;
|
|
1219
|
+
|
|
1220
|
+
/**
|
|
1221
|
+
* Partitioning strategy. One of: fast, balanced, accurate, vlm, agentic.
|
|
1222
|
+
*/
|
|
1223
|
+
method?: Method;
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
export interface SourceRetrieveChunksParams {
|
|
1227
|
+
/**
|
|
1228
|
+
* The natural-language search query to find relevant chunks
|
|
1229
|
+
*/
|
|
1230
|
+
query: string;
|
|
1231
|
+
|
|
1232
|
+
/**
|
|
1233
|
+
* Optional list of file IDs to restrict retrieval scope (preferred)
|
|
1234
|
+
*/
|
|
1235
|
+
file_ids?: Array<string> | null;
|
|
1236
|
+
|
|
1237
|
+
/**
|
|
1238
|
+
* Optional list of file names to restrict retrieval scope (deprecated, use
|
|
1239
|
+
* file_ids)
|
|
1240
|
+
*/
|
|
1241
|
+
file_names?: Array<string> | null;
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1003
1244
|
export declare namespace Sources {
|
|
1004
1245
|
export {
|
|
1005
|
-
type
|
|
1246
|
+
type Element as Element,
|
|
1247
|
+
type Method as Method,
|
|
1006
1248
|
type PublicSource as PublicSource,
|
|
1007
1249
|
type SourceListResponse as SourceListResponse,
|
|
1008
1250
|
type SourceDeleteResponse as SourceDeleteResponse,
|
|
1009
1251
|
type SourceAskResponse as SourceAskResponse,
|
|
1010
1252
|
type SourceExtractResponse as SourceExtractResponse,
|
|
1011
|
-
type
|
|
1253
|
+
type SourceGetBuildStatusResponse as SourceGetBuildStatusResponse,
|
|
1254
|
+
type SourceGetElementsResponse as SourceGetElementsResponse,
|
|
1255
|
+
type SourceIngestFileResponse as SourceIngestFileResponse,
|
|
1256
|
+
type SourceIngestGitHubResponse as SourceIngestGitHubResponse,
|
|
1257
|
+
type SourceIngestURLResponse as SourceIngestURLResponse,
|
|
1258
|
+
type SourceIngestYoutubeResponse as SourceIngestYoutubeResponse,
|
|
1259
|
+
type SourceReprocessResponse as SourceReprocessResponse,
|
|
1012
1260
|
type SourceRetrieveChunksResponse as SourceRetrieveChunksResponse,
|
|
1261
|
+
type SourceListParams as SourceListParams,
|
|
1013
1262
|
type SourceDeleteParams as SourceDeleteParams,
|
|
1014
1263
|
type SourceAskParams as SourceAskParams,
|
|
1015
1264
|
type SourceExtractParams as SourceExtractParams,
|
|
1016
|
-
type
|
|
1017
|
-
type
|
|
1265
|
+
type SourceGetBuildStatusParams as SourceGetBuildStatusParams,
|
|
1266
|
+
type SourceGetElementsParams as SourceGetElementsParams,
|
|
1267
|
+
type SourceIngestFileParams as SourceIngestFileParams,
|
|
1268
|
+
type SourceIngestGitHubParams as SourceIngestGitHubParams,
|
|
1269
|
+
type SourceIngestURLParams as SourceIngestURLParams,
|
|
1270
|
+
type SourceIngestYoutubeParams as SourceIngestYoutubeParams,
|
|
1271
|
+
type SourceReprocessParams as SourceReprocessParams,
|
|
1018
1272
|
type SourceRetrieveChunksParams as SourceRetrieveChunksParams,
|
|
1019
|
-
type SourceUploadParams as SourceUploadParams,
|
|
1020
|
-
type SourceUploadGitHubParams as SourceUploadGitHubParams,
|
|
1021
|
-
type SourceUploadURLParams as SourceUploadURLParams,
|
|
1022
|
-
type SourceUploadYoutubeParams as SourceUploadYoutubeParams,
|
|
1023
1273
|
};
|
|
1024
1274
|
}
|