graphor 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +320 -31
- package/client.d.mts +2 -5
- package/client.d.mts.map +1 -1
- package/client.d.ts +2 -5
- package/client.d.ts.map +1 -1
- package/client.js +6 -8
- package/client.js.map +1 -1
- package/client.mjs +6 -8
- package/client.mjs.map +1 -1
- package/internal/qs/formats.d.mts +7 -0
- package/internal/qs/formats.d.mts.map +1 -0
- package/internal/qs/formats.d.ts +7 -0
- package/internal/qs/formats.d.ts.map +1 -0
- package/internal/qs/formats.js +13 -0
- package/internal/qs/formats.js.map +1 -0
- package/internal/qs/formats.mjs +9 -0
- package/internal/qs/formats.mjs.map +1 -0
- package/internal/qs/index.d.mts +10 -0
- package/internal/qs/index.d.mts.map +1 -0
- package/internal/qs/index.d.ts +10 -0
- package/internal/qs/index.d.ts.map +1 -0
- package/internal/qs/index.js +14 -0
- package/internal/qs/index.js.map +1 -0
- package/internal/qs/index.mjs +10 -0
- package/internal/qs/index.mjs.map +1 -0
- package/internal/qs/stringify.d.mts +3 -0
- package/internal/qs/stringify.d.mts.map +1 -0
- package/internal/qs/stringify.d.ts +3 -0
- package/internal/qs/stringify.d.ts.map +1 -0
- package/internal/qs/stringify.js +277 -0
- package/internal/qs/stringify.js.map +1 -0
- package/internal/qs/stringify.mjs +274 -0
- package/internal/qs/stringify.mjs.map +1 -0
- package/internal/qs/types.d.mts +57 -0
- package/internal/qs/types.d.mts.map +1 -0
- package/internal/qs/types.d.ts +57 -0
- package/internal/qs/types.d.ts.map +1 -0
- package/internal/qs/types.js +3 -0
- package/internal/qs/types.js.map +1 -0
- package/internal/qs/types.mjs +2 -0
- package/internal/qs/types.mjs.map +1 -0
- package/internal/qs/utils.d.mts +15 -0
- package/internal/qs/utils.d.mts.map +1 -0
- package/internal/qs/utils.d.ts +15 -0
- package/internal/qs/utils.d.ts.map +1 -0
- package/internal/qs/utils.js +230 -0
- package/internal/qs/utils.js.map +1 -0
- package/internal/qs/utils.mjs +217 -0
- package/internal/qs/utils.mjs.map +1 -0
- package/internal/tslib.js +17 -17
- package/internal/utils/query.d.mts +0 -3
- package/internal/utils/query.d.mts.map +1 -1
- package/internal/utils/query.d.ts +0 -3
- package/internal/utils/query.d.ts.map +1 -1
- package/internal/utils/query.js +3 -16
- package/internal/utils/query.js.map +1 -1
- package/internal/utils/query.mjs +2 -16
- package/internal/utils/query.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/index.d.mts +1 -1
- package/resources/index.d.mts.map +1 -1
- package/resources/index.d.ts +1 -1
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs.map +1 -1
- package/resources/sources.d.mts +494 -337
- package/resources/sources.d.mts.map +1 -1
- package/resources/sources.d.ts +494 -337
- package/resources/sources.d.ts.map +1 -1
- package/resources/sources.js +199 -242
- package/resources/sources.js.map +1 -1
- package/resources/sources.mjs +199 -242
- package/resources/sources.mjs.map +1 -1
- package/src/client.ts +40 -24
- package/src/internal/qs/LICENSE.md +13 -0
- package/src/internal/qs/README.md +3 -0
- package/src/internal/qs/formats.ts +10 -0
- package/src/internal/qs/index.ts +13 -0
- package/src/internal/qs/stringify.ts +385 -0
- package/src/internal/qs/types.ts +71 -0
- package/src/internal/qs/utils.ts +265 -0
- package/src/internal/utils/query.ts +2 -18
- package/src/resources/index.ts +17 -8
- package/src/resources/sources.ts +613 -370
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
package/src/resources/sources.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { APIPromise } from '../core/api-promise';
|
|
|
5
5
|
import { type Uploadable } from '../core/uploads';
|
|
6
6
|
import { RequestOptions } from '../internal/request-options';
|
|
7
7
|
import { multipartFormRequestOptions } from '../internal/uploads';
|
|
8
|
+
import { path } from '../internal/utils/path';
|
|
8
9
|
|
|
9
10
|
export class Sources extends APIResource {
|
|
10
11
|
/**
|
|
@@ -15,12 +16,17 @@ export class Sources extends APIResource {
|
|
|
15
16
|
* type, origin) along with its current processing status and a human-readable
|
|
16
17
|
* status message.
|
|
17
18
|
*
|
|
19
|
+
* **Query parameters:**
|
|
20
|
+
*
|
|
21
|
+
* - **file_ids** (list, optional): If provided, only sources whose file_id is in
|
|
22
|
+
* this list are returned. Repeat the param for multiple IDs (e.g.
|
|
23
|
+
* ?file_ids=id1&file_ids=id2).
|
|
24
|
+
*
|
|
18
25
|
* **Status messages returned per source:**
|
|
19
26
|
*
|
|
20
27
|
* - `"completed"` → _"Source processed successfully"_
|
|
21
28
|
* - `"processing"` → _"Source is being processed"_
|
|
22
29
|
* - `"failed"` → _"Source processing failed"_
|
|
23
|
-
* - `"new"` → _"Source uploaded, awaiting processing"_
|
|
24
30
|
*
|
|
25
31
|
* **Returns** a JSON array of `PublicSourceResponse` objects.
|
|
26
32
|
*
|
|
@@ -33,8 +39,11 @@ export class Sources extends APIResource {
|
|
|
33
39
|
* const publicSources = await client.sources.list();
|
|
34
40
|
* ```
|
|
35
41
|
*/
|
|
36
|
-
list(
|
|
37
|
-
|
|
42
|
+
list(
|
|
43
|
+
query: SourceListParams | null | undefined = {},
|
|
44
|
+
options?: RequestOptions,
|
|
45
|
+
): APIPromise<SourceListResponse> {
|
|
46
|
+
return this._client.get('/sources', { query, ...options });
|
|
38
47
|
}
|
|
39
48
|
|
|
40
49
|
/**
|
|
@@ -170,249 +179,147 @@ export class Sources extends APIResource {
|
|
|
170
179
|
}
|
|
171
180
|
|
|
172
181
|
/**
|
|
173
|
-
*
|
|
174
|
-
*
|
|
175
|
-
*
|
|
176
|
-
*
|
|
177
|
-
*
|
|
178
|
-
*
|
|
179
|
-
*
|
|
180
|
-
*
|
|
181
|
-
*
|
|
182
|
-
*
|
|
183
|
-
* -
|
|
184
|
-
*
|
|
185
|
-
*
|
|
186
|
-
*
|
|
187
|
-
*
|
|
188
|
-
*
|
|
189
|
-
*
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
-
*
|
|
193
|
-
*
|
|
194
|
-
*
|
|
195
|
-
*
|
|
196
|
-
*
|
|
197
|
-
*
|
|
198
|
-
* -
|
|
199
|
-
*
|
|
200
|
-
*
|
|
201
|
-
* -
|
|
182
|
+
* Return the status and optional parsed elements for an async build identified by
|
|
183
|
+
* `build_id`.
|
|
184
|
+
*
|
|
185
|
+
* Use this endpoint to poll the result of an async ingestion or re-process
|
|
186
|
+
* request. The `build_id` is returned in the response of:
|
|
187
|
+
*
|
|
188
|
+
* - `POST /v2/sources/upload` (async file upload)
|
|
189
|
+
* - `POST /v2/sources/upload-url-source` (async URL ingestion)
|
|
190
|
+
* - `POST /v2/sources/upload-github-source` (async GitHub ingestion)
|
|
191
|
+
* - `POST /v2/sources/upload-youtube-source` (async YouTube ingestion)
|
|
192
|
+
* - `POST /v2/sources/process` (async re-process)
|
|
193
|
+
*
|
|
194
|
+
* **Path parameter:**
|
|
195
|
+
*
|
|
196
|
+
* - **build_id** (str, required): The build identifier returned when the job was
|
|
197
|
+
* scheduled.
|
|
198
|
+
*
|
|
199
|
+
* **Query parameters:**
|
|
200
|
+
*
|
|
201
|
+
* - **suppress_elements** (bool, default `false`): When `true`, elements are
|
|
202
|
+
* omitted from the response. When `false` (default), the response includes the
|
|
203
|
+
* parsed elements (chunks/partitions) for the build if it completed
|
|
204
|
+
* successfully. Same structure as `POST /sources/elements` (each element has
|
|
205
|
+
* `page_content` and `metadata`). If `page` and `page_size` are not passed, all
|
|
206
|
+
* elements are returned.
|
|
207
|
+
* - **suppress_img_base64** (bool, default `false`): When `true`, `img_base64` is
|
|
208
|
+
* omitted from each element (useful to reduce payload size when images are not
|
|
209
|
+
* needed).
|
|
210
|
+
* - **page** (int, optional): 1-based page number. Only used when
|
|
211
|
+
* `suppress_elements=false` and pagination is used (pass either `page` or
|
|
212
|
+
* `page_size` to enable pagination).
|
|
213
|
+
* - **page_size** (int, optional): Number of elements per page (max 100). Only
|
|
214
|
+
* used when `suppress_elements=false` and pagination is used.
|
|
215
|
+
*
|
|
216
|
+
* **Response fields:**
|
|
217
|
+
*
|
|
218
|
+
* - **build_id**: The requested build identifier.
|
|
219
|
+
* - **status**: SourceNodeStatus value when history exists (e.g. Processed,
|
|
220
|
+
* Processing, Processing failed). `not_found` when no history exists (build in
|
|
221
|
+
* progress or invalid id).
|
|
222
|
+
* - **success**: `true` only when `status == "Completed"`
|
|
223
|
+
* (SourceNodeStatus.COMPLETED).
|
|
224
|
+
* - **file_id**, **file_name**: Source identifiers; present when the build has
|
|
225
|
+
* been persisted (history exists).
|
|
226
|
+
* - **error**: Error message from the pipeline when the build failed.
|
|
227
|
+
* - **method**, **total_partitions**, **total_pages**: Build metadata when history
|
|
228
|
+
* exists.
|
|
229
|
+
* - **created_at**, **updated_at**: ISO8601 timestamps when history exists.
|
|
230
|
+
* - **message**: Human-readable message (e.g. when status is `not_found`).
|
|
231
|
+
* - **elements**: List of `{ page_content, metadata }` when
|
|
232
|
+
* `suppress_elements=false` and the build completed successfully.
|
|
233
|
+
* - **total_elements**, **page**, **page_size**, **total_pages_elements**:
|
|
234
|
+
* Pagination metadata for `elements` when `suppress_elements=false`.
|
|
202
235
|
*
|
|
203
236
|
* **Error responses:**
|
|
204
237
|
*
|
|
205
|
-
* - `400` — Invalid input (e.g. neither identifier provided).
|
|
206
|
-
* - `404` — Source file not found.
|
|
207
238
|
* - `500` — Unexpected internal error.
|
|
208
239
|
*
|
|
209
240
|
* @example
|
|
210
241
|
* ```ts
|
|
211
|
-
* const response = await client.sources.
|
|
242
|
+
* const response = await client.sources.getBuildStatus(
|
|
243
|
+
* 'build_id',
|
|
244
|
+
* );
|
|
212
245
|
* ```
|
|
213
246
|
*/
|
|
214
|
-
|
|
215
|
-
|
|
247
|
+
getBuildStatus(
|
|
248
|
+
buildID: string,
|
|
249
|
+
query: SourceGetBuildStatusParams | null | undefined = {},
|
|
216
250
|
options?: RequestOptions,
|
|
217
|
-
): APIPromise<
|
|
218
|
-
return this._client.
|
|
251
|
+
): APIPromise<SourceGetBuildStatusResponse> {
|
|
252
|
+
return this._client.get(path`/sources/builds/${buildID}`, { query, ...options });
|
|
219
253
|
}
|
|
220
254
|
|
|
221
255
|
/**
|
|
222
|
-
*
|
|
223
|
-
*
|
|
224
|
-
* Use this endpoint to re-run the data-ingestion pipeline on a source that is
|
|
225
|
-
* already present in the knowledge graph — for example, after changing the
|
|
226
|
-
* partitioning strategy. The endpoint locates the source node, sets its status to
|
|
227
|
-
* `PROCESSING`, applies the requested partition method, and executes the full
|
|
228
|
-
* ingestion pipeline synchronously (partitioning, chunking, embedding, and graph
|
|
229
|
-
* persistence).
|
|
230
|
-
*
|
|
231
|
-
* **Parameters (JSON body):**
|
|
256
|
+
* Retrieve the parsed elements (chunks/partitions) of a source in the same format
|
|
257
|
+
* as get_build_status.
|
|
232
258
|
*
|
|
233
|
-
*
|
|
234
|
-
*
|
|
235
|
-
*
|
|
236
|
-
* Use `file_id` instead when possible. At least one of `file_id` or `file_name`
|
|
237
|
-
* must be provided.
|
|
238
|
-
* - **partition_method** (str, default `"basic"`): The partitioning strategy to
|
|
239
|
-
* apply. One of: `basic` (Fast), `hi_res` (Balanced), `hi_res_ft` (Accurate),
|
|
240
|
-
* `mai` (VLM), `graphorlm` (Agentic).
|
|
259
|
+
* Returns elements with explicit fields: element_id, element_type, text, markdown,
|
|
260
|
+
* html, img_base64 (optional), position, page_number, bounding_box, page_layout,
|
|
261
|
+
* etc.
|
|
241
262
|
*
|
|
242
|
-
* **
|
|
263
|
+
* **Query parameters:**
|
|
243
264
|
*
|
|
244
|
-
* **
|
|
265
|
+
* - **file_id** (str, required): Unique identifier of the source.
|
|
266
|
+
* - **page** (int, optional): 1-based page number. Use with page_size to enable
|
|
267
|
+
* pagination.
|
|
268
|
+
* - **page_size** (int, optional): Number of elements per page (max 100).
|
|
269
|
+
* - **suppress_img_base64** (bool, default false): When true, img_base64 is
|
|
270
|
+
* omitted from each element.
|
|
271
|
+
* - **type** (str, optional): Filter by element type (e.g. NarrativeText, Title,
|
|
272
|
+
* Table).
|
|
273
|
+
* - **page_numbers** (list, optional): Restrict to specific page numbers (repeat
|
|
274
|
+
* param for multiple).
|
|
275
|
+
* - **elementsToRemove** (list, optional): Element types to exclude (repeat param
|
|
276
|
+
* for multiple).
|
|
245
277
|
*
|
|
246
|
-
*
|
|
247
|
-
*
|
|
278
|
+
* **Returns** Paginated response with items as BuildStatusElement list (same shape
|
|
279
|
+
* as GET /builds/{build_id} elements).
|
|
248
280
|
*
|
|
249
281
|
* @example
|
|
250
282
|
* ```ts
|
|
251
|
-
* const
|
|
252
|
-
*
|
|
253
|
-
*/
|
|
254
|
-
parse(body: SourceParseParams, options?: RequestOptions): APIPromise<PublicSource> {
|
|
255
|
-
return this._client.post('/sources/process', { body, ...options });
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
/**
|
|
259
|
-
* Retrieve relevant document chunks from the prebuilt RAG vector store.
|
|
260
|
-
*
|
|
261
|
-
* Performs a semantic similarity search over the project's prebuilt RAG store
|
|
262
|
-
* using Google File Search with grounding. Returns the most relevant text chunks
|
|
263
|
-
* along with their source metadata (file name, page number, relevance score). This
|
|
264
|
-
* is a pure retrieval endpoint — it does **not** generate an answer; use
|
|
265
|
-
* `/ask-sources` for Q&A.
|
|
266
|
-
*
|
|
267
|
-
* **Parameters (JSON body):**
|
|
268
|
-
*
|
|
269
|
-
* - **query** (str, required): The natural-language search query used to find
|
|
270
|
-
* relevant chunks.
|
|
271
|
-
* - **file_ids** (list[str], optional — preferred): Restrict retrieval to specific
|
|
272
|
-
* source file IDs.
|
|
273
|
-
* - **file_names** (list[str], optional — deprecated): Restrict retrieval to
|
|
274
|
-
* specific source file names. Use `file_ids` when possible.
|
|
275
|
-
*
|
|
276
|
-
* **Returns** a `PublicRetrieveResponse` containing:
|
|
277
|
-
*
|
|
278
|
-
* - `query` — the original search query.
|
|
279
|
-
* - `chunks` — a list of `PublicRetrieveChunk` objects, each with `text`,
|
|
280
|
-
* `file_name`, `page_number`, `score`, and additional `metadata`.
|
|
281
|
-
* - `total` — the total number of chunks returned.
|
|
282
|
-
*
|
|
283
|
-
* **Error responses:**
|
|
284
|
-
*
|
|
285
|
-
* - `500` — Unexpected internal error during retrieval.
|
|
286
|
-
*
|
|
287
|
-
* @example
|
|
288
|
-
* ```ts
|
|
289
|
-
* const response = await client.sources.retrieveChunks({
|
|
290
|
-
* query: 'query',
|
|
283
|
+
* const response = await client.sources.getElements({
|
|
284
|
+
* file_id: 'file_id',
|
|
291
285
|
* });
|
|
292
286
|
* ```
|
|
293
287
|
*/
|
|
294
|
-
|
|
295
|
-
|
|
288
|
+
getElements(
|
|
289
|
+
query: SourceGetElementsParams,
|
|
296
290
|
options?: RequestOptions,
|
|
297
|
-
): APIPromise<
|
|
298
|
-
return this._client.
|
|
291
|
+
): APIPromise<SourceGetElementsResponse> {
|
|
292
|
+
return this._client.get('/sources/get-elements', { query, ...options });
|
|
299
293
|
}
|
|
300
294
|
|
|
301
295
|
/**
|
|
302
|
-
* Upload a local file and
|
|
303
|
-
* graph.
|
|
304
|
-
*
|
|
305
|
-
* This endpoint accepts a **`multipart/form-data`** request containing the file to
|
|
306
|
-
* upload. It validates the file size (max 100 MB) and extension against the list
|
|
307
|
-
* of allowed types, stores the file, and then runs the full data-ingestion
|
|
308
|
-
* pipeline synchronously — including partitioning, chunking, embedding, and graph
|
|
309
|
-
* persistence.
|
|
310
|
-
*
|
|
311
|
-
* **Usage via SDK (AI agent / MCP context):**
|
|
312
|
-
*
|
|
313
|
-
* The SDK executes code in a **remote sandboxed container** that does **not** have
|
|
314
|
-
* access to the local filesystem. Because of this, `fs.createReadStream()` with a
|
|
315
|
-
* local file path will **not work** — the file does not exist inside the
|
|
316
|
-
* container.
|
|
317
|
-
*
|
|
318
|
-
* For **text-based files** (md, txt, csv, html, etc.), you can read the file
|
|
319
|
-
* content with a local tool (e.g. IDE file-read) and create a `File` object in
|
|
320
|
-
* memory:
|
|
321
|
-
*
|
|
322
|
-
* ```
|
|
323
|
-
* const content = `...file contents read locally...`;
|
|
324
|
-
* const file = new File([content], "my-document.md", { type: "text/markdown" });
|
|
325
|
-
* const result = await client.sources.upload({ file });
|
|
326
|
-
* ```
|
|
327
|
-
*
|
|
328
|
-
* For **binary files** (pdf, docx, images, video, audio, etc.), the in-memory
|
|
329
|
-
* approach is impractical. Instead, use **`curl`** or **`subprocess`** to call the
|
|
330
|
-
* upload endpoint directly from the local machine, where the file is accessible:
|
|
331
|
-
*
|
|
332
|
-
* ```
|
|
333
|
-
* import { execSync } from "child_process";
|
|
334
|
-
*
|
|
335
|
-
* const result = execSync(`
|
|
336
|
-
* curl -s -X POST "{base_url}/source/upload" \
|
|
337
|
-
* -H "Authorization: Bearer {access_key}" \
|
|
338
|
-
* -F "file=@/local/path/to/document.pdf" \
|
|
339
|
-
* -F "partition_method=graphorlm"
|
|
340
|
-
* `).toString();
|
|
341
|
-
* ```
|
|
296
|
+
* Upload a local file and schedule ingestion in the background.
|
|
342
297
|
*
|
|
343
|
-
*
|
|
344
|
-
*
|
|
345
|
-
*
|
|
346
|
-
* import subprocess, json
|
|
347
|
-
*
|
|
348
|
-
* result = subprocess.run([
|
|
349
|
-
* "curl", "-s", "-X", "POST", "{base_url}/source/upload",
|
|
350
|
-
* "-H", "Authorization: Bearer {access_key}",
|
|
351
|
-
* "-F", "file=@/local/path/to/document.pdf",
|
|
352
|
-
* "-F", "partition_method=graphorlm",
|
|
353
|
-
* ], capture_output=True, text=True)
|
|
354
|
-
* response = json.loads(result.stdout)
|
|
355
|
-
* ```
|
|
356
|
-
*
|
|
357
|
-
* **Important:** Do NOT use `fs.createReadStream("/local/path")` inside the SDK
|
|
358
|
-
* code — it will fail because the execution environment cannot access local paths.
|
|
359
|
-
* Always prefer `curl`/`requests` executed locally for binary uploads.
|
|
360
|
-
*
|
|
361
|
-
* **Usage via curl:**
|
|
362
|
-
*
|
|
363
|
-
* ```
|
|
364
|
-
* curl -X POST "{base_url}/source/upload" \
|
|
365
|
-
* -H "Authorization: Bearer {access_key}" \
|
|
366
|
-
* -F "file=@/path/to/document.pdf" \
|
|
367
|
-
* -F "partition_method=graphorlm"
|
|
368
|
-
* ```
|
|
369
|
-
*
|
|
370
|
-
* **Usage via Python `requests`:**
|
|
371
|
-
*
|
|
372
|
-
* ```
|
|
373
|
-
* import requests
|
|
374
|
-
*
|
|
375
|
-
* with open("document.pdf", "rb") as f:
|
|
376
|
-
* response = requests.post(
|
|
377
|
-
* "{base_url}/source/upload",
|
|
378
|
-
* headers={"Authorization": "Bearer {access_key}"},
|
|
379
|
-
* files={"file": ("document.pdf", f, "application/pdf")},
|
|
380
|
-
* data={"partition_method": "graphorlm"}, # optional
|
|
381
|
-
* )
|
|
382
|
-
* ```
|
|
298
|
+
* Accepts **`multipart/form-data`** with the file. Validates size (max 100 MB) and
|
|
299
|
+
* extension, stores the file, then schedules the full data-ingestion pipeline in
|
|
300
|
+
* the background. Returns immediately with a `build_id` to poll for status.
|
|
383
301
|
*
|
|
384
302
|
* **Parameters:**
|
|
385
303
|
*
|
|
386
|
-
* - **file** (`multipart/form-data`): The file to upload. Must include
|
|
387
|
-
* `Content-Length`
|
|
388
|
-
*
|
|
389
|
-
*
|
|
390
|
-
*
|
|
391
|
-
* One of: `basic` (Fast), `hi_res` (Balanced), `hi_res_ft` (Accurate), `mai`
|
|
392
|
-
* (VLM), `graphorlm` (Agentic). When omitted, the system default is used.
|
|
304
|
+
* - **file** (`multipart/form-data`): The file to upload. Must include
|
|
305
|
+
* `Content-Length` and have a supported extension (pdf, doc, docx, csv, txt, md,
|
|
306
|
+
* etc.).
|
|
307
|
+
* - **method** (`form`, optional): Partitioning strategy. One of: `fast`,
|
|
308
|
+
* `balanced`, `accurate`, `vlm`, `agentic`. Default when omitted.
|
|
393
309
|
*
|
|
394
|
-
* **Returns**
|
|
395
|
-
*
|
|
396
|
-
*
|
|
397
|
-
* **Error responses:**
|
|
398
|
-
*
|
|
399
|
-
* - `400` — Unsupported file type or missing file name.
|
|
400
|
-
* - `411` — Missing `Content-Length` header (file size cannot be determined).
|
|
401
|
-
* - `413` — File exceeds the 100 MB size limit.
|
|
402
|
-
* - `403` — Permission denied.
|
|
403
|
-
* - `404` — File not found during processing.
|
|
404
|
-
* - `500` — Unexpected internal error.
|
|
310
|
+
* **Returns** `AsyncIngestResponse` with `build_id`. Use it to check processing
|
|
311
|
+
* status.
|
|
405
312
|
*
|
|
406
313
|
* @example
|
|
407
314
|
* ```ts
|
|
408
|
-
* const
|
|
315
|
+
* const response = await client.sources.ingestFile({
|
|
409
316
|
* file: fs.createReadStream('path/to/file'),
|
|
410
317
|
* });
|
|
411
318
|
* ```
|
|
412
319
|
*/
|
|
413
|
-
|
|
320
|
+
ingestFile(body: SourceIngestFileParams, options?: RequestOptions): APIPromise<SourceIngestFileResponse> {
|
|
414
321
|
return this._client.post(
|
|
415
|
-
'/sources/
|
|
322
|
+
'/sources/ingest-file',
|
|
416
323
|
multipartFormRequestOptions({ body, ...options }, this._client),
|
|
417
324
|
);
|
|
418
325
|
}
|
|
@@ -420,46 +327,42 @@ export class Sources extends APIResource {
|
|
|
420
327
|
/**
|
|
421
328
|
* Ingest a GitHub repository as a source into the project's knowledge graph.
|
|
422
329
|
*
|
|
423
|
-
*
|
|
424
|
-
*
|
|
425
|
-
* embeddings, and persists everything in the knowledge graph synchronously.
|
|
330
|
+
* Schedules the ingestion in the background and returns immediately with a
|
|
331
|
+
* `build_id`. Use the returned `build_id` to poll for processing status.
|
|
426
332
|
*
|
|
427
333
|
* **Parameters (JSON body):**
|
|
428
334
|
*
|
|
429
335
|
* - **url** (str, required): The GitHub repository URL to ingest (e.g.
|
|
430
336
|
* `https://github.com/owner/repo`).
|
|
431
337
|
*
|
|
432
|
-
* **Returns**
|
|
433
|
-
* ID, name, size, type, source origin, partition method, and processing status).
|
|
434
|
-
*
|
|
435
|
-
* **Error responses:**
|
|
436
|
-
*
|
|
437
|
-
* - `500` — Unexpected internal error during GitHub source processing.
|
|
338
|
+
* **Returns** `AsyncIngestResponse` with `build_id`.
|
|
438
339
|
*
|
|
439
340
|
* @example
|
|
440
341
|
* ```ts
|
|
441
|
-
* const
|
|
342
|
+
* const response = await client.sources.ingestGitHub({
|
|
442
343
|
* url: 'url',
|
|
443
344
|
* });
|
|
444
345
|
* ```
|
|
445
346
|
*/
|
|
446
|
-
|
|
447
|
-
|
|
347
|
+
ingestGitHub(
|
|
348
|
+
body: SourceIngestGitHubParams,
|
|
349
|
+
options?: RequestOptions,
|
|
350
|
+
): APIPromise<SourceIngestGitHubResponse> {
|
|
351
|
+
return this._client.post('/sources/ingest-github', { body, ...options });
|
|
448
352
|
}
|
|
449
353
|
|
|
450
354
|
/**
|
|
451
355
|
* Ingest a web page (or a set of crawled pages) as a source into the project's
|
|
452
356
|
* knowledge graph.
|
|
453
357
|
*
|
|
454
|
-
*
|
|
455
|
-
*
|
|
456
|
-
*
|
|
457
|
-
* synchronously.
|
|
358
|
+
* Unlike the synchronous version, this endpoint schedules the ingestion in the
|
|
359
|
+
* background and returns immediately with a `processing` status. The source will
|
|
360
|
+
* be fully available once background processing completes.
|
|
458
361
|
*
|
|
459
362
|
* If the URL points directly to a downloadable file (detected via URL path
|
|
460
|
-
* extension or HTTP Content-Type), the file is downloaded
|
|
461
|
-
*
|
|
462
|
-
*
|
|
363
|
+
* extension or HTTP Content-Type), the file is first downloaded and uploaded to
|
|
364
|
+
* storage synchronously, then the partition/embedding pipeline runs in the
|
|
365
|
+
* background.
|
|
463
366
|
*
|
|
464
367
|
* **Parameters (JSON body):**
|
|
465
368
|
*
|
|
@@ -467,12 +370,13 @@ export class Sources extends APIResource {
|
|
|
467
370
|
* - **crawlUrls** (bool, optional, default `false`): When `true`, the system will
|
|
468
371
|
* also follow and ingest links found on the page. Ignored when the URL resolves
|
|
469
372
|
* to a file.
|
|
470
|
-
* - **
|
|
471
|
-
*
|
|
472
|
-
*
|
|
373
|
+
* - **method** (str, optional): The partitioning strategy to use. One of: `fast`,
|
|
374
|
+
* `balanced`, `accurate`, `vlm`, `agentic`. When omitted the system default is
|
|
375
|
+
* applied.
|
|
473
376
|
*
|
|
474
|
-
* **Returns** a `PublicSourceResponse` with
|
|
475
|
-
*
|
|
377
|
+
* **Returns** a `PublicSourceResponse` with `status: "processing"` immediately.
|
|
378
|
+
* Poll the source status endpoint using the returned `file_id` to track
|
|
379
|
+
* completion.
|
|
476
380
|
*
|
|
477
381
|
* **Error responses:**
|
|
478
382
|
*
|
|
@@ -481,58 +385,228 @@ export class Sources extends APIResource {
|
|
|
481
385
|
*
|
|
482
386
|
* @example
|
|
483
387
|
* ```ts
|
|
484
|
-
* const
|
|
388
|
+
* const response = await client.sources.ingestURL({
|
|
485
389
|
* url: 'url',
|
|
486
390
|
* });
|
|
487
391
|
* ```
|
|
488
392
|
*/
|
|
489
|
-
|
|
490
|
-
return this._client.post('/sources/
|
|
393
|
+
ingestURL(body: SourceIngestURLParams, options?: RequestOptions): APIPromise<SourceIngestURLResponse> {
|
|
394
|
+
return this._client.post('/sources/ingest-url', { body, ...options });
|
|
491
395
|
}
|
|
492
396
|
|
|
493
397
|
/**
|
|
494
398
|
* Ingest a YouTube video as a source into the project's knowledge graph.
|
|
495
399
|
*
|
|
496
|
-
*
|
|
497
|
-
*
|
|
498
|
-
*
|
|
400
|
+
* Schedules the ingestion in the background and returns immediately with a
|
|
401
|
+
* `build_id`. The endpoint will download the transcript/captions and process them
|
|
402
|
+
* in the background. Use the returned `build_id` to poll for processing status.
|
|
499
403
|
*
|
|
500
404
|
* **Parameters (JSON body):**
|
|
501
405
|
*
|
|
502
406
|
* - **url** (str, required): The YouTube video URL to ingest (e.g.
|
|
503
407
|
* `https://www.youtube.com/watch?v=...`).
|
|
504
408
|
*
|
|
505
|
-
* **Returns**
|
|
506
|
-
*
|
|
409
|
+
* **Returns** `AsyncIngestResponse` with `build_id`.
|
|
410
|
+
*
|
|
411
|
+
* @example
|
|
412
|
+
* ```ts
|
|
413
|
+
* const response = await client.sources.ingestYoutube({
|
|
414
|
+
* url: 'url',
|
|
415
|
+
* });
|
|
416
|
+
* ```
|
|
417
|
+
*/
|
|
418
|
+
ingestYoutube(
|
|
419
|
+
body: SourceIngestYoutubeParams,
|
|
420
|
+
options?: RequestOptions,
|
|
421
|
+
): APIPromise<SourceIngestYoutubeResponse> {
|
|
422
|
+
return this._client.post('/sources/ingest-youtube', { body, ...options });
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Re-process (re-parse) an existing source in the background.
|
|
427
|
+
*
|
|
428
|
+
* Schedules the data-ingestion pipeline (partitioning, chunking, embedding) for an
|
|
429
|
+
* existing source and returns immediately with a `build_id`. Use it to poll for
|
|
430
|
+
* status.
|
|
431
|
+
*
|
|
432
|
+
* **Parameters (JSON body):**
|
|
433
|
+
*
|
|
434
|
+
* - **file_id** (str, required): Unique identifier of the source to re-process.
|
|
435
|
+
* - **method** (str, default `"fast"`): Partitioning strategy. One of: `fast`,
|
|
436
|
+
* `balanced`, `accurate`, `vlm`, `agentic`.
|
|
437
|
+
*
|
|
438
|
+
* **Returns** `AsyncIngestResponse` with `build_id`.
|
|
439
|
+
*
|
|
440
|
+
* @example
|
|
441
|
+
* ```ts
|
|
442
|
+
* const response = await client.sources.reprocess({
|
|
443
|
+
* file_id: 'file_id',
|
|
444
|
+
* });
|
|
445
|
+
* ```
|
|
446
|
+
*/
|
|
447
|
+
reprocess(body: SourceReprocessParams, options?: RequestOptions): APIPromise<SourceReprocessResponse> {
|
|
448
|
+
return this._client.post('/sources/reprocess', { body, ...options });
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Retrieve relevant document chunks from the prebuilt RAG vector store.
|
|
453
|
+
*
|
|
454
|
+
* Performs a semantic similarity search over the project's prebuilt RAG store
|
|
455
|
+
* using Google File Search with grounding. Returns the most relevant text chunks
|
|
456
|
+
* along with their source metadata (file name, page number, relevance score). This
|
|
457
|
+
* is a pure retrieval endpoint — it does **not** generate an answer; use
|
|
458
|
+
* `/ask-sources` for Q&A.
|
|
459
|
+
*
|
|
460
|
+
* **Parameters (JSON body):**
|
|
461
|
+
*
|
|
462
|
+
* - **query** (str, required): The natural-language search query used to find
|
|
463
|
+
* relevant chunks.
|
|
464
|
+
* - **file_ids** (list[str], optional — preferred): Restrict retrieval to specific
|
|
465
|
+
* source file IDs.
|
|
466
|
+
* - **file_names** (list[str], optional — deprecated): Restrict retrieval to
|
|
467
|
+
* specific source file names. Use `file_ids` when possible.
|
|
468
|
+
*
|
|
469
|
+
* **Returns** a `PublicRetrieveResponse` containing:
|
|
470
|
+
*
|
|
471
|
+
* - `query` — the original search query.
|
|
472
|
+
* - `chunks` — a list of `PublicRetrieveChunk` objects, each with `text`,
|
|
473
|
+
* `file_name`, `page_number`, `score`, and additional `metadata`.
|
|
474
|
+
* - `total` — the total number of chunks returned.
|
|
507
475
|
*
|
|
508
476
|
* **Error responses:**
|
|
509
477
|
*
|
|
510
|
-
* - `500` — Unexpected internal error during
|
|
478
|
+
* - `500` — Unexpected internal error during retrieval.
|
|
511
479
|
*
|
|
512
480
|
* @example
|
|
513
481
|
* ```ts
|
|
514
|
-
* const
|
|
515
|
-
*
|
|
482
|
+
* const response = await client.sources.retrieveChunks({
|
|
483
|
+
* query: 'query',
|
|
516
484
|
* });
|
|
517
485
|
* ```
|
|
518
486
|
*/
|
|
519
|
-
|
|
520
|
-
|
|
487
|
+
retrieveChunks(
|
|
488
|
+
body: SourceRetrieveChunksParams,
|
|
489
|
+
options?: RequestOptions,
|
|
490
|
+
): APIPromise<SourceRetrieveChunksResponse> {
|
|
491
|
+
return this._client.post('/sources/prebuilt-rag', { body, ...options });
|
|
521
492
|
}
|
|
522
493
|
}
|
|
523
494
|
|
|
524
495
|
/**
|
|
525
|
-
*
|
|
496
|
+
* A single parsed element (chunk/partition) from a source, with explicit fields.
|
|
497
|
+
*/
|
|
498
|
+
export interface Element {
|
|
499
|
+
/**
|
|
500
|
+
* Bounding box (e.g. left, top, width, height) when available.
|
|
501
|
+
*/
|
|
502
|
+
bounding_box?: { [key: string]: unknown } | null;
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Unique identifier for the element.
|
|
506
|
+
*/
|
|
507
|
+
element_id?: string | null;
|
|
508
|
+
|
|
509
|
+
/**
|
|
510
|
+
* Type of the element (Title, NarrativeText, Image, Table, etc.).
|
|
511
|
+
*/
|
|
512
|
+
element_type?:
|
|
513
|
+
| 'Title'
|
|
514
|
+
| 'NarrativeText'
|
|
515
|
+
| 'TextBlock'
|
|
516
|
+
| 'ListItem'
|
|
517
|
+
| 'Table'
|
|
518
|
+
| 'TableRow'
|
|
519
|
+
| 'Image'
|
|
520
|
+
| 'Footer'
|
|
521
|
+
| 'Formula'
|
|
522
|
+
| 'CompositeElement'
|
|
523
|
+
| 'FigureCaption'
|
|
524
|
+
| 'PageBreak'
|
|
525
|
+
| 'Address'
|
|
526
|
+
| 'EmailAddress'
|
|
527
|
+
| 'PageNumber'
|
|
528
|
+
| 'CodeSnippet'
|
|
529
|
+
| 'Header'
|
|
530
|
+
| 'FormKeysValues'
|
|
531
|
+
| 'Link'
|
|
532
|
+
| 'UncategorizedText'
|
|
533
|
+
| 'Abstract'
|
|
534
|
+
| 'AsideText'
|
|
535
|
+
| 'Reference'
|
|
536
|
+
| 'ReferenceContent'
|
|
537
|
+
| 'Chart'
|
|
538
|
+
| 'Seal'
|
|
539
|
+
| 'FormulaNumber'
|
|
540
|
+
| null;
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* HTML representation of the content, when available.
|
|
544
|
+
*/
|
|
545
|
+
html?: string | null;
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Base64-encoded image data, when the element is an image.
|
|
549
|
+
*/
|
|
550
|
+
img_base64?: string | null;
|
|
551
|
+
|
|
552
|
+
/**
|
|
553
|
+
* Markdown representation of the content, when available.
|
|
554
|
+
*/
|
|
555
|
+
markdown?: string | null;
|
|
556
|
+
|
|
557
|
+
/**
|
|
558
|
+
* Additional metadata.
|
|
559
|
+
*/
|
|
560
|
+
metadata?: { [key: string]: unknown };
|
|
561
|
+
|
|
562
|
+
/**
|
|
563
|
+
* Annotation/summary for the page containing this element.
|
|
564
|
+
*/
|
|
565
|
+
page_annotation?: string | null;
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* Keywords extracted for the page.
|
|
569
|
+
*/
|
|
570
|
+
page_keywords?: Array<string> | null;
|
|
571
|
+
|
|
572
|
+
/**
|
|
573
|
+
* Page dimensions (width, height) when available.
|
|
574
|
+
*/
|
|
575
|
+
page_layout?: { [key: string]: unknown } | null;
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Page number (1-based) where the element appears.
|
|
579
|
+
*/
|
|
580
|
+
page_number?: number | null;
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Topics extracted for the page.
|
|
584
|
+
*/
|
|
585
|
+
page_topics?: Array<string> | null;
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Order/position of the element within the document.
|
|
589
|
+
*/
|
|
590
|
+
position?: number | null;
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Plain text content of the element.
|
|
594
|
+
*/
|
|
595
|
+
text?: string;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
/**
|
|
599
|
+
* Public-facing partition method names for API v2.
|
|
526
600
|
*
|
|
527
|
-
*
|
|
601
|
+
* Maps to internal PartitionMethod as:
|
|
528
602
|
*
|
|
529
|
-
* -
|
|
530
|
-
* -
|
|
531
|
-
* -
|
|
532
|
-
* -
|
|
533
|
-
* -
|
|
603
|
+
* - fast → basic
|
|
604
|
+
* - balanced → hi_res
|
|
605
|
+
* - accurate → hi_res_ft
|
|
606
|
+
* - vlm → mai
|
|
607
|
+
* - agentic → graphorlm
|
|
534
608
|
*/
|
|
535
|
-
export type
|
|
609
|
+
export type Method = 'fast' | 'balanced' | 'accurate' | 'vlm' | 'agentic';
|
|
536
610
|
|
|
537
611
|
export interface PublicSource {
|
|
538
612
|
/**
|
|
@@ -582,19 +656,10 @@ export interface PublicSource {
|
|
|
582
656
|
file_id?: string | null;
|
|
583
657
|
|
|
584
658
|
/**
|
|
585
|
-
* Partitioning strategy used during ingestion.
|
|
586
|
-
*
|
|
659
|
+
* Partitioning strategy used during ingestion. V1 API: basic, hi_res, hi_res_ft,
|
|
660
|
+
* mai, graphorlm. V2 API: fast, balanced, accurate, vlm, agentic.
|
|
587
661
|
*/
|
|
588
|
-
|
|
589
|
-
| 'basic'
|
|
590
|
-
| 'hi_res'
|
|
591
|
-
| 'hi_res_ft'
|
|
592
|
-
| 'mai'
|
|
593
|
-
| 'graphorlm'
|
|
594
|
-
| 'ocr'
|
|
595
|
-
| 'advanced'
|
|
596
|
-
| 'yolox'
|
|
597
|
-
| null;
|
|
662
|
+
method?: string | null;
|
|
598
663
|
}
|
|
599
664
|
|
|
600
665
|
export type SourceListResponse = Array<PublicSource>;
|
|
@@ -679,11 +744,121 @@ export interface SourceExtractResponse {
|
|
|
679
744
|
structured_output?: { [key: string]: unknown } | null;
|
|
680
745
|
}
|
|
681
746
|
|
|
682
|
-
|
|
747
|
+
/**
|
|
748
|
+
* Status and optional result for an async build (ingestion/re-process) identified
|
|
749
|
+
* by build_id.
|
|
750
|
+
*
|
|
751
|
+
* Returned by GET /v2/sources/builds/{build_id}. When the build has completed
|
|
752
|
+
* successfully, includes file_id, file_name, and optionally paginated elements
|
|
753
|
+
* (parsed chunks).
|
|
754
|
+
*/
|
|
755
|
+
export interface SourceGetBuildStatusResponse {
|
|
756
|
+
/**
|
|
757
|
+
* The build identifier returned when the ingestion was scheduled.
|
|
758
|
+
*/
|
|
759
|
+
build_id: string;
|
|
760
|
+
|
|
761
|
+
/**
|
|
762
|
+
* Current build status. When a build history exists, this is a SourceNodeStatus
|
|
763
|
+
* value (e.g. Completed, Processing, Processing failed). When no history exists
|
|
764
|
+
* yet: not_found.
|
|
765
|
+
*/
|
|
766
|
+
status: string;
|
|
767
|
+
|
|
768
|
+
/**
|
|
769
|
+
* True if the build completed successfully (status is Completed).
|
|
770
|
+
*/
|
|
771
|
+
success: boolean;
|
|
772
|
+
|
|
773
|
+
/**
|
|
774
|
+
* ISO8601 timestamp when the build (history) was created. Present when history
|
|
775
|
+
* exists.
|
|
776
|
+
*/
|
|
777
|
+
created_at?: string | null;
|
|
778
|
+
|
|
779
|
+
/**
|
|
780
|
+
* Paginated list of parsed elements (chunks) for this build. Only present when
|
|
781
|
+
* suppress_elements=false and the build has completed (status Completed).
|
|
782
|
+
*/
|
|
783
|
+
elements?: Array<Element> | null;
|
|
784
|
+
|
|
785
|
+
/**
|
|
786
|
+
* Error message from the pipeline, if the build failed (e.g. processing_failed).
|
|
787
|
+
*/
|
|
788
|
+
error?: string | null;
|
|
789
|
+
|
|
790
|
+
/**
|
|
791
|
+
* Source file identifier. Present when the build has been persisted (history
|
|
792
|
+
* exists).
|
|
793
|
+
*/
|
|
794
|
+
file_id?: string | null;
|
|
795
|
+
|
|
796
|
+
/**
|
|
797
|
+
* Display name of the source file. Present when the build has been persisted.
|
|
798
|
+
*/
|
|
799
|
+
file_name?: string | null;
|
|
800
|
+
|
|
801
|
+
/**
|
|
802
|
+
* Human-readable message (e.g. when status is not_found or processing).
|
|
803
|
+
*/
|
|
804
|
+
message?: string | null;
|
|
805
|
+
|
|
806
|
+
/**
|
|
807
|
+
* Public-facing partition method names for API v2.
|
|
808
|
+
*
|
|
809
|
+
* Maps to internal PartitionMethod as:
|
|
810
|
+
*
|
|
811
|
+
* - fast → basic
|
|
812
|
+
* - balanced → hi_res
|
|
813
|
+
* - accurate → hi_res_ft
|
|
814
|
+
* - vlm → mai
|
|
815
|
+
* - agentic → graphorlm
|
|
816
|
+
*/
|
|
817
|
+
method?: Method | null;
|
|
818
|
+
|
|
819
|
+
/**
|
|
820
|
+
* Current page of elements (1-based). Null when no pagination was requested (all
|
|
821
|
+
* elements returned).
|
|
822
|
+
*/
|
|
823
|
+
page?: number | null;
|
|
824
|
+
|
|
825
|
+
/**
|
|
826
|
+
* Number of elements per page. Null when no pagination was requested.
|
|
827
|
+
*/
|
|
828
|
+
page_size?: number | null;
|
|
829
|
+
|
|
830
|
+
/**
|
|
831
|
+
* Total number of elements for this build. Present when suppress_elements=false.
|
|
832
|
+
*/
|
|
833
|
+
total_elements?: number | null;
|
|
834
|
+
|
|
835
|
+
/**
|
|
836
|
+
* Total pages in the source for this build. Present when history exists.
|
|
837
|
+
*/
|
|
838
|
+
total_pages?: number | null;
|
|
839
|
+
|
|
840
|
+
/**
|
|
841
|
+
* Total number of pages of elements. Null when no pagination was requested.
|
|
842
|
+
*/
|
|
843
|
+
total_pages_elements?: number | null;
|
|
844
|
+
|
|
845
|
+
/**
|
|
846
|
+
* Total number of partitions created in this build. Present when history exists.
|
|
847
|
+
*/
|
|
848
|
+
total_partitions?: number | null;
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* ISO8601 timestamp when the build (history) was last updated. Present when
|
|
852
|
+
* history exists.
|
|
853
|
+
*/
|
|
854
|
+
updated_at?: string | null;
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
export interface SourceGetElementsResponse {
|
|
683
858
|
/**
|
|
684
859
|
* List of items in the current page
|
|
685
860
|
*/
|
|
686
|
-
items: Array<
|
|
861
|
+
items: Array<Element>;
|
|
687
862
|
|
|
688
863
|
/**
|
|
689
864
|
* Total number of items
|
|
@@ -706,30 +881,89 @@ export interface SourceLoadElementsResponse {
|
|
|
706
881
|
total_pages?: number | null;
|
|
707
882
|
}
|
|
708
883
|
|
|
709
|
-
export
|
|
884
|
+
export interface SourceIngestFileResponse {
|
|
710
885
|
/**
|
|
711
|
-
*
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
*
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
*
|
|
722
|
-
* )
|
|
886
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
887
|
+
*/
|
|
888
|
+
build_id: string;
|
|
889
|
+
|
|
890
|
+
/**
|
|
891
|
+
* If the request was not successful, this will contain an error message.
|
|
892
|
+
*/
|
|
893
|
+
error?: string | null;
|
|
894
|
+
|
|
895
|
+
/**
|
|
896
|
+
* Whether the request was successfully scheduled.
|
|
723
897
|
*/
|
|
724
|
-
|
|
725
|
-
|
|
898
|
+
success?: boolean;
|
|
899
|
+
}
|
|
726
900
|
|
|
727
|
-
|
|
901
|
+
export interface SourceIngestGitHubResponse {
|
|
902
|
+
/**
|
|
903
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
904
|
+
*/
|
|
905
|
+
build_id: string;
|
|
728
906
|
|
|
729
|
-
|
|
907
|
+
/**
|
|
908
|
+
* If the request was not successful, this will contain an error message.
|
|
909
|
+
*/
|
|
910
|
+
error?: string | null;
|
|
730
911
|
|
|
731
|
-
|
|
732
|
-
|
|
912
|
+
/**
|
|
913
|
+
* Whether the request was successfully scheduled.
|
|
914
|
+
*/
|
|
915
|
+
success?: boolean;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
export interface SourceIngestURLResponse {
|
|
919
|
+
/**
|
|
920
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
921
|
+
*/
|
|
922
|
+
build_id: string;
|
|
923
|
+
|
|
924
|
+
/**
|
|
925
|
+
* If the request was not successful, this will contain an error message.
|
|
926
|
+
*/
|
|
927
|
+
error?: string | null;
|
|
928
|
+
|
|
929
|
+
/**
|
|
930
|
+
* Whether the request was successfully scheduled.
|
|
931
|
+
*/
|
|
932
|
+
success?: boolean;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
export interface SourceIngestYoutubeResponse {
|
|
936
|
+
/**
|
|
937
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
938
|
+
*/
|
|
939
|
+
build_id: string;
|
|
940
|
+
|
|
941
|
+
/**
|
|
942
|
+
* If the request was not successful, this will contain an error message.
|
|
943
|
+
*/
|
|
944
|
+
error?: string | null;
|
|
945
|
+
|
|
946
|
+
/**
|
|
947
|
+
* Whether the request was successfully scheduled.
|
|
948
|
+
*/
|
|
949
|
+
success?: boolean;
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
export interface SourceReprocessResponse {
|
|
953
|
+
/**
|
|
954
|
+
* The ID of the build. This ID can be used to check the status of the request.
|
|
955
|
+
*/
|
|
956
|
+
build_id: string;
|
|
957
|
+
|
|
958
|
+
/**
|
|
959
|
+
* If the request was not successful, this will contain an error message.
|
|
960
|
+
*/
|
|
961
|
+
error?: string | null;
|
|
962
|
+
|
|
963
|
+
/**
|
|
964
|
+
* Whether the request was successfully scheduled.
|
|
965
|
+
*/
|
|
966
|
+
success?: boolean;
|
|
733
967
|
}
|
|
734
968
|
|
|
735
969
|
export interface SourceRetrieveChunksResponse {
|
|
@@ -783,6 +1017,14 @@ export namespace SourceRetrieveChunksResponse {
|
|
|
783
1017
|
}
|
|
784
1018
|
}
|
|
785
1019
|
|
|
1020
|
+
export interface SourceListParams {
|
|
1021
|
+
/**
|
|
1022
|
+
* Optional list of file_id to filter by (only these sources are returned). Repeat
|
|
1023
|
+
* the param for multiple IDs.
|
|
1024
|
+
*/
|
|
1025
|
+
file_ids?: Array<string> | null;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
786
1028
|
export interface SourceDeleteParams {
|
|
787
1029
|
/**
|
|
788
1030
|
* Unique identifier for the source (preferred)
|
|
@@ -865,116 +1107,78 @@ export interface SourceExtractParams {
|
|
|
865
1107
|
thinking_level?: 'fast' | 'balanced' | 'accurate' | null;
|
|
866
1108
|
}
|
|
867
1109
|
|
|
868
|
-
export interface
|
|
869
|
-
/**
|
|
870
|
-
* Unique identifier for the source (preferred)
|
|
871
|
-
*/
|
|
872
|
-
file_id?: string | null;
|
|
873
|
-
|
|
874
|
-
/**
|
|
875
|
-
* The name of the file (deprecated, use file_id)
|
|
876
|
-
*/
|
|
877
|
-
file_name?: string | null;
|
|
878
|
-
|
|
879
|
-
/**
|
|
880
|
-
* Optional filter to narrow down the returned elements
|
|
881
|
-
*/
|
|
882
|
-
filter?: SourceLoadElementsParams.Filter | null;
|
|
883
|
-
|
|
884
|
-
/**
|
|
885
|
-
* Current page number
|
|
886
|
-
*/
|
|
1110
|
+
export interface SourceGetBuildStatusParams {
|
|
887
1111
|
page?: number | null;
|
|
888
1112
|
|
|
889
|
-
/**
|
|
890
|
-
* Number of items per page
|
|
891
|
-
*/
|
|
892
1113
|
page_size?: number | null;
|
|
1114
|
+
|
|
1115
|
+
suppress_elements?: boolean;
|
|
1116
|
+
|
|
1117
|
+
suppress_img_base64?: boolean;
|
|
893
1118
|
}
|
|
894
1119
|
|
|
895
|
-
export
|
|
1120
|
+
export interface SourceGetElementsParams {
|
|
896
1121
|
/**
|
|
897
|
-
*
|
|
1122
|
+
* Unique identifier of the source
|
|
898
1123
|
*/
|
|
899
|
-
|
|
900
|
-
/**
|
|
901
|
-
* List of element types to exclude from the results
|
|
902
|
-
*/
|
|
903
|
-
elementsToRemove?: Array<string> | null;
|
|
904
|
-
|
|
905
|
-
/**
|
|
906
|
-
* Restrict results to specific page numbers from the original document
|
|
907
|
-
*/
|
|
908
|
-
page_numbers?: Array<number> | null;
|
|
909
|
-
|
|
910
|
-
/**
|
|
911
|
-
* Filter by element type (e.g. NarrativeText, Title, Table)
|
|
912
|
-
*/
|
|
913
|
-
type?: string | null;
|
|
914
|
-
}
|
|
915
|
-
}
|
|
1124
|
+
file_id: string;
|
|
916
1125
|
|
|
917
|
-
export interface SourceParseParams {
|
|
918
1126
|
/**
|
|
919
|
-
*
|
|
1127
|
+
* Element types to exclude
|
|
920
1128
|
*/
|
|
921
|
-
|
|
1129
|
+
elementsToRemove?: Array<string> | null;
|
|
922
1130
|
|
|
923
1131
|
/**
|
|
924
|
-
*
|
|
1132
|
+
* 1-based page number (use with page_size)
|
|
925
1133
|
*/
|
|
926
|
-
|
|
1134
|
+
page?: number | null;
|
|
927
1135
|
|
|
928
1136
|
/**
|
|
929
|
-
*
|
|
930
|
-
* (Balanced), hi_res_ft (Accurate), mai (VLM), graphorlm (Agentic)
|
|
1137
|
+
* Restrict to specific page numbers
|
|
931
1138
|
*/
|
|
932
|
-
|
|
933
|
-
}
|
|
1139
|
+
page_numbers?: Array<number> | null;
|
|
934
1140
|
|
|
935
|
-
export interface SourceRetrieveChunksParams {
|
|
936
1141
|
/**
|
|
937
|
-
*
|
|
1142
|
+
* Number of elements per page
|
|
938
1143
|
*/
|
|
939
|
-
|
|
1144
|
+
page_size?: number | null;
|
|
940
1145
|
|
|
941
1146
|
/**
|
|
942
|
-
*
|
|
1147
|
+
* When true, img_base64 is omitted from each element
|
|
943
1148
|
*/
|
|
944
|
-
|
|
1149
|
+
suppress_img_base64?: boolean;
|
|
945
1150
|
|
|
946
1151
|
/**
|
|
947
|
-
*
|
|
948
|
-
* file_ids)
|
|
1152
|
+
* Filter by element type (e.g. NarrativeText, Title)
|
|
949
1153
|
*/
|
|
950
|
-
|
|
1154
|
+
type?: string | null;
|
|
951
1155
|
}
|
|
952
1156
|
|
|
953
|
-
export interface
|
|
1157
|
+
export interface SourceIngestFileParams {
|
|
954
1158
|
file: Uploadable;
|
|
955
1159
|
|
|
956
1160
|
/**
|
|
957
|
-
*
|
|
1161
|
+
* Public-facing partition method names for API v2.
|
|
958
1162
|
*
|
|
959
|
-
*
|
|
1163
|
+
* Maps to internal PartitionMethod as:
|
|
960
1164
|
*
|
|
961
|
-
* -
|
|
962
|
-
* -
|
|
963
|
-
* -
|
|
964
|
-
* -
|
|
965
|
-
* -
|
|
1165
|
+
* - fast → basic
|
|
1166
|
+
* - balanced → hi_res
|
|
1167
|
+
* - accurate → hi_res_ft
|
|
1168
|
+
* - vlm → mai
|
|
1169
|
+
* - agentic → graphorlm
|
|
966
1170
|
*/
|
|
967
|
-
|
|
1171
|
+
method?: Method | null;
|
|
968
1172
|
}
|
|
969
1173
|
|
|
970
|
-
export interface
|
|
1174
|
+
export interface SourceIngestGitHubParams {
|
|
971
1175
|
/**
|
|
972
1176
|
* The GitHub repository URL to ingest (e.g. https://github.com/owner/repo)
|
|
973
1177
|
*/
|
|
974
1178
|
url: string;
|
|
975
1179
|
}
|
|
976
1180
|
|
|
977
|
-
export interface
|
|
1181
|
+
export interface SourceIngestURLParams {
|
|
978
1182
|
/**
|
|
979
1183
|
* The web page URL to ingest
|
|
980
1184
|
*/
|
|
@@ -986,20 +1190,20 @@ export interface SourceUploadURLParams {
|
|
|
986
1190
|
crawlUrls?: boolean;
|
|
987
1191
|
|
|
988
1192
|
/**
|
|
989
|
-
*
|
|
1193
|
+
* Public-facing partition method names for API v2.
|
|
990
1194
|
*
|
|
991
|
-
*
|
|
1195
|
+
* Maps to internal PartitionMethod as:
|
|
992
1196
|
*
|
|
993
|
-
* -
|
|
994
|
-
* -
|
|
995
|
-
* -
|
|
996
|
-
* -
|
|
997
|
-
* -
|
|
1197
|
+
* - fast → basic
|
|
1198
|
+
* - balanced → hi_res
|
|
1199
|
+
* - accurate → hi_res_ft
|
|
1200
|
+
* - vlm → mai
|
|
1201
|
+
* - agentic → graphorlm
|
|
998
1202
|
*/
|
|
999
|
-
|
|
1203
|
+
method?: Method | null;
|
|
1000
1204
|
}
|
|
1001
1205
|
|
|
1002
|
-
export interface
|
|
1206
|
+
export interface SourceIngestYoutubeParams {
|
|
1003
1207
|
/**
|
|
1004
1208
|
* The YouTube video URL to ingest (e.g.
|
|
1005
1209
|
* https://www.youtube.com/watch?v=dQw4w9WgXcQ)
|
|
@@ -1007,25 +1211,64 @@ export interface SourceUploadYoutubeParams {
|
|
|
1007
1211
|
url: string;
|
|
1008
1212
|
}
|
|
1009
1213
|
|
|
1214
|
+
export interface SourceReprocessParams {
|
|
1215
|
+
/**
|
|
1216
|
+
* Unique identifier of the source to re-process.
|
|
1217
|
+
*/
|
|
1218
|
+
file_id: string;
|
|
1219
|
+
|
|
1220
|
+
/**
|
|
1221
|
+
* Partitioning strategy. One of: fast, balanced, accurate, vlm, agentic.
|
|
1222
|
+
*/
|
|
1223
|
+
method?: Method;
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
export interface SourceRetrieveChunksParams {
|
|
1227
|
+
/**
|
|
1228
|
+
* The natural-language search query to find relevant chunks
|
|
1229
|
+
*/
|
|
1230
|
+
query: string;
|
|
1231
|
+
|
|
1232
|
+
/**
|
|
1233
|
+
* Optional list of file IDs to restrict retrieval scope (preferred)
|
|
1234
|
+
*/
|
|
1235
|
+
file_ids?: Array<string> | null;
|
|
1236
|
+
|
|
1237
|
+
/**
|
|
1238
|
+
* Optional list of file names to restrict retrieval scope (deprecated, use
|
|
1239
|
+
* file_ids)
|
|
1240
|
+
*/
|
|
1241
|
+
file_names?: Array<string> | null;
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1010
1244
|
export declare namespace Sources {
|
|
1011
1245
|
export {
|
|
1012
|
-
type
|
|
1246
|
+
type Element as Element,
|
|
1247
|
+
type Method as Method,
|
|
1013
1248
|
type PublicSource as PublicSource,
|
|
1014
1249
|
type SourceListResponse as SourceListResponse,
|
|
1015
1250
|
type SourceDeleteResponse as SourceDeleteResponse,
|
|
1016
1251
|
type SourceAskResponse as SourceAskResponse,
|
|
1017
1252
|
type SourceExtractResponse as SourceExtractResponse,
|
|
1018
|
-
type
|
|
1253
|
+
type SourceGetBuildStatusResponse as SourceGetBuildStatusResponse,
|
|
1254
|
+
type SourceGetElementsResponse as SourceGetElementsResponse,
|
|
1255
|
+
type SourceIngestFileResponse as SourceIngestFileResponse,
|
|
1256
|
+
type SourceIngestGitHubResponse as SourceIngestGitHubResponse,
|
|
1257
|
+
type SourceIngestURLResponse as SourceIngestURLResponse,
|
|
1258
|
+
type SourceIngestYoutubeResponse as SourceIngestYoutubeResponse,
|
|
1259
|
+
type SourceReprocessResponse as SourceReprocessResponse,
|
|
1019
1260
|
type SourceRetrieveChunksResponse as SourceRetrieveChunksResponse,
|
|
1261
|
+
type SourceListParams as SourceListParams,
|
|
1020
1262
|
type SourceDeleteParams as SourceDeleteParams,
|
|
1021
1263
|
type SourceAskParams as SourceAskParams,
|
|
1022
1264
|
type SourceExtractParams as SourceExtractParams,
|
|
1023
|
-
type
|
|
1024
|
-
type
|
|
1265
|
+
type SourceGetBuildStatusParams as SourceGetBuildStatusParams,
|
|
1266
|
+
type SourceGetElementsParams as SourceGetElementsParams,
|
|
1267
|
+
type SourceIngestFileParams as SourceIngestFileParams,
|
|
1268
|
+
type SourceIngestGitHubParams as SourceIngestGitHubParams,
|
|
1269
|
+
type SourceIngestURLParams as SourceIngestURLParams,
|
|
1270
|
+
type SourceIngestYoutubeParams as SourceIngestYoutubeParams,
|
|
1271
|
+
type SourceReprocessParams as SourceReprocessParams,
|
|
1025
1272
|
type SourceRetrieveChunksParams as SourceRetrieveChunksParams,
|
|
1026
|
-
type SourceUploadParams as SourceUploadParams,
|
|
1027
|
-
type SourceUploadGitHubParams as SourceUploadGitHubParams,
|
|
1028
|
-
type SourceUploadURLParams as SourceUploadURLParams,
|
|
1029
|
-
type SourceUploadYoutubeParams as SourceUploadYoutubeParams,
|
|
1030
1273
|
};
|
|
1031
1274
|
}
|