knowledge2 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. knowledge2-0.4.0.dist-info/METADATA +556 -0
  2. knowledge2-0.4.0.dist-info/RECORD +139 -0
  3. knowledge2-0.4.0.dist-info/WHEEL +5 -0
  4. knowledge2-0.4.0.dist-info/top_level.txt +1 -0
  5. sdk/__init__.py +70 -0
  6. sdk/_async_base.py +525 -0
  7. sdk/_async_paging.py +57 -0
  8. sdk/_base.py +541 -0
  9. sdk/_logging.py +41 -0
  10. sdk/_paging.py +73 -0
  11. sdk/_preview.py +70 -0
  12. sdk/_raw_response.py +25 -0
  13. sdk/_request_options.py +51 -0
  14. sdk/_transport.py +144 -0
  15. sdk/_validation.py +25 -0
  16. sdk/_validation_response.py +36 -0
  17. sdk/_version.py +3 -0
  18. sdk/async_client.py +320 -0
  19. sdk/async_resources/__init__.py +45 -0
  20. sdk/async_resources/_mixin_base.py +42 -0
  21. sdk/async_resources/a2a.py +230 -0
  22. sdk/async_resources/agents.py +489 -0
  23. sdk/async_resources/audit.py +145 -0
  24. sdk/async_resources/auth.py +133 -0
  25. sdk/async_resources/console.py +409 -0
  26. sdk/async_resources/corpora.py +276 -0
  27. sdk/async_resources/deployments.py +106 -0
  28. sdk/async_resources/documents.py +592 -0
  29. sdk/async_resources/feeds.py +248 -0
  30. sdk/async_resources/indexes.py +208 -0
  31. sdk/async_resources/jobs.py +165 -0
  32. sdk/async_resources/metadata.py +48 -0
  33. sdk/async_resources/models.py +102 -0
  34. sdk/async_resources/onboarding.py +538 -0
  35. sdk/async_resources/orgs.py +37 -0
  36. sdk/async_resources/pipelines.py +523 -0
  37. sdk/async_resources/projects.py +90 -0
  38. sdk/async_resources/search.py +262 -0
  39. sdk/async_resources/training.py +357 -0
  40. sdk/async_resources/usage.py +91 -0
  41. sdk/client.py +417 -0
  42. sdk/config.py +182 -0
  43. sdk/errors.py +178 -0
  44. sdk/examples/auth_factory.py +34 -0
  45. sdk/examples/batch_operations.py +57 -0
  46. sdk/examples/document_upload.py +56 -0
  47. sdk/examples/e2e_lifecycle.py +213 -0
  48. sdk/examples/error_handling.py +61 -0
  49. sdk/examples/pagination.py +64 -0
  50. sdk/examples/quickstart.py +36 -0
  51. sdk/examples/request_options.py +44 -0
  52. sdk/examples/search.py +64 -0
  53. sdk/integrations/__init__.py +57 -0
  54. sdk/integrations/_client.py +101 -0
  55. sdk/integrations/langchain/__init__.py +6 -0
  56. sdk/integrations/langchain/retriever.py +166 -0
  57. sdk/integrations/langchain/tools.py +108 -0
  58. sdk/integrations/llamaindex/__init__.py +11 -0
  59. sdk/integrations/llamaindex/filters.py +78 -0
  60. sdk/integrations/llamaindex/retriever.py +162 -0
  61. sdk/integrations/llamaindex/tools.py +109 -0
  62. sdk/integrations/llamaindex/vector_store.py +320 -0
  63. sdk/models/__init__.py +18 -0
  64. sdk/models/_base.py +24 -0
  65. sdk/models/_registry.py +457 -0
  66. sdk/models/a2a.py +92 -0
  67. sdk/models/agents.py +109 -0
  68. sdk/models/audit.py +28 -0
  69. sdk/models/auth.py +49 -0
  70. sdk/models/chunks.py +20 -0
  71. sdk/models/common.py +14 -0
  72. sdk/models/console.py +103 -0
  73. sdk/models/corpora.py +48 -0
  74. sdk/models/deployments.py +13 -0
  75. sdk/models/documents.py +126 -0
  76. sdk/models/embeddings.py +24 -0
  77. sdk/models/evaluation.py +17 -0
  78. sdk/models/feedback.py +9 -0
  79. sdk/models/feeds.py +57 -0
  80. sdk/models/indexes.py +36 -0
  81. sdk/models/jobs.py +52 -0
  82. sdk/models/models.py +26 -0
  83. sdk/models/onboarding.py +323 -0
  84. sdk/models/orgs.py +11 -0
  85. sdk/models/pipelines.py +147 -0
  86. sdk/models/projects.py +19 -0
  87. sdk/models/search.py +149 -0
  88. sdk/models/training.py +57 -0
  89. sdk/models/usage.py +39 -0
  90. sdk/namespaces.py +386 -0
  91. sdk/py.typed +0 -0
  92. sdk/resources/__init__.py +45 -0
  93. sdk/resources/_mixin_base.py +40 -0
  94. sdk/resources/a2a.py +230 -0
  95. sdk/resources/agents.py +487 -0
  96. sdk/resources/audit.py +144 -0
  97. sdk/resources/auth.py +138 -0
  98. sdk/resources/console.py +411 -0
  99. sdk/resources/corpora.py +269 -0
  100. sdk/resources/deployments.py +105 -0
  101. sdk/resources/documents.py +597 -0
  102. sdk/resources/feeds.py +246 -0
  103. sdk/resources/indexes.py +210 -0
  104. sdk/resources/jobs.py +164 -0
  105. sdk/resources/metadata.py +53 -0
  106. sdk/resources/models.py +99 -0
  107. sdk/resources/onboarding.py +542 -0
  108. sdk/resources/orgs.py +35 -0
  109. sdk/resources/pipeline_builder.py +257 -0
  110. sdk/resources/pipelines.py +520 -0
  111. sdk/resources/projects.py +87 -0
  112. sdk/resources/search.py +277 -0
  113. sdk/resources/training.py +358 -0
  114. sdk/resources/usage.py +92 -0
  115. sdk/types/__init__.py +366 -0
  116. sdk/types/a2a.py +88 -0
  117. sdk/types/agents.py +133 -0
  118. sdk/types/audit.py +26 -0
  119. sdk/types/auth.py +45 -0
  120. sdk/types/chunks.py +18 -0
  121. sdk/types/common.py +10 -0
  122. sdk/types/console.py +99 -0
  123. sdk/types/corpora.py +42 -0
  124. sdk/types/deployments.py +11 -0
  125. sdk/types/documents.py +104 -0
  126. sdk/types/embeddings.py +22 -0
  127. sdk/types/evaluation.py +15 -0
  128. sdk/types/feedback.py +7 -0
  129. sdk/types/feeds.py +61 -0
  130. sdk/types/indexes.py +30 -0
  131. sdk/types/jobs.py +50 -0
  132. sdk/types/models.py +22 -0
  133. sdk/types/onboarding.py +395 -0
  134. sdk/types/orgs.py +9 -0
  135. sdk/types/pipelines.py +177 -0
  136. sdk/types/projects.py +14 -0
  137. sdk/types/search.py +116 -0
  138. sdk/types/training.py +55 -0
  139. sdk/types/usage.py +37 -0
@@ -0,0 +1,592 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ import os
6
+ from typing import Any
7
+
8
+ from sdk._async_paging import AsyncPager
9
+ from sdk._paging import Page
10
+ from sdk._request_options import RequestOptions
11
+ from sdk._validation import require_str
12
+ from sdk.async_resources._mixin_base import AsyncRequesterMixin
13
+ from sdk.errors import ConfirmationRequiredError
14
+ from sdk.types import (
15
+ ChunkingConfig,
16
+ DocumentBatchUploadResponse,
17
+ DocumentCreateResponse,
18
+ DocumentDeleteResponse,
19
+ DocumentDetailResponse,
20
+ DocumentManifestIngestResponse,
21
+ DocumentUrlIngestResponse,
22
+ )
23
+
24
+
25
+ class AsyncDocumentsMixin(AsyncRequesterMixin):
26
+ async def upload_document(
27
+ self,
28
+ corpus_id: str,
29
+ *,
30
+ file_path: str | None = None,
31
+ file_bytes: bytes | None = None,
32
+ filename: str | None = None,
33
+ raw_text: str | None = None,
34
+ source_uri: str | None = None,
35
+ metadata: dict[str, Any] | None = None,
36
+ auto_index: bool | None = None,
37
+ chunk_strategy: str | None = None,
38
+ chunking: ChunkingConfig | None = None,
39
+ idempotency_key: str | None = None,
40
+ request_options: RequestOptions | None = None,
41
+ ) -> DocumentCreateResponse:
42
+ """Upload a document to a corpus.
43
+
44
+ Args:
45
+ corpus_id: Target corpus ID.
46
+ file_path: Path to file to upload.
47
+ file_bytes: Raw file bytes to upload.
48
+ filename: Filename when using file_bytes.
49
+ raw_text: Raw text content to upload.
50
+ source_uri: Optional source URI for the document.
51
+ metadata: Optional document metadata.
52
+ auto_index: Whether to auto-index after ingestion.
53
+ chunk_strategy: Deprecated - use chunking instead.
54
+ chunking: Chunking configuration (strategy, chunk_size, overlap, etc.)
55
+ idempotency_key: Optional key for idempotent requests.
56
+ """
57
+ corpus_id = require_str(corpus_id, "corpus_id")
58
+ if file_path and (file_bytes or raw_text):
59
+ raise ValueError("file_path cannot be combined with file_bytes or raw_text")
60
+ if file_bytes and raw_text:
61
+ raise ValueError("file_bytes cannot be combined with raw_text")
62
+ headers = self._idempotency_headers(idempotency_key)
63
+ if file_path:
64
+ form: dict[str, str] = {}
65
+ if source_uri is not None:
66
+ form["source_uri"] = source_uri
67
+ if metadata is not None:
68
+ form["metadata"] = json.dumps(metadata)
69
+ if auto_index is not None:
70
+ form["auto_index"] = str(bool(auto_index)).lower()
71
+ if chunking is not None:
72
+ form["chunking"] = json.dumps(chunking)
73
+ elif chunk_strategy is not None:
74
+ form["chunk_strategy"] = chunk_strategy
75
+ with open(file_path, "rb") as handle:
76
+ files = {"file": (os.path.basename(file_path), handle)}
77
+ data = await self._request(
78
+ "POST",
79
+ f"/v1/corpora/{corpus_id}/documents",
80
+ data=form,
81
+ files=files,
82
+ headers=headers,
83
+ request_options=request_options,
84
+ )
85
+ return self._maybe_validate(data, "DocumentCreateResponse")
86
+ if file_bytes is not None:
87
+ if not filename:
88
+ raise ValueError("filename is required when using file_bytes")
89
+ form_data: dict[str, str] = {}
90
+ if source_uri is not None:
91
+ form_data["source_uri"] = source_uri
92
+ if metadata is not None:
93
+ form_data["metadata"] = json.dumps(metadata)
94
+ if auto_index is not None:
95
+ form_data["auto_index"] = str(bool(auto_index)).lower()
96
+ if chunking is not None:
97
+ form_data["chunking"] = json.dumps(chunking)
98
+ elif chunk_strategy is not None:
99
+ form_data["chunk_strategy"] = chunk_strategy
100
+ file_payload: dict[str, Any] = {"file": (filename, file_bytes)}
101
+ data = await self._request(
102
+ "POST",
103
+ f"/v1/corpora/{corpus_id}/documents",
104
+ data=form_data,
105
+ files=file_payload,
106
+ headers=headers,
107
+ request_options=request_options,
108
+ )
109
+ return self._maybe_validate(data, "DocumentCreateResponse")
110
+ if raw_text is None:
111
+ raise ValueError("raw_text is required when no file is provided")
112
+ payload: dict[str, Any] = {}
113
+ if raw_text is not None:
114
+ payload["raw_text"] = raw_text
115
+ if source_uri is not None:
116
+ payload["source_uri"] = source_uri
117
+ if metadata is not None:
118
+ payload["metadata"] = metadata
119
+ if auto_index is not None:
120
+ payload["auto_index"] = auto_index
121
+ if chunking is not None:
122
+ payload["chunking"] = chunking
123
+ elif chunk_strategy is not None:
124
+ payload["chunk_strategy"] = chunk_strategy
125
+ data = await self._request(
126
+ "POST",
127
+ f"/v1/corpora/{corpus_id}/documents",
128
+ json=payload,
129
+ headers=headers,
130
+ request_options=request_options,
131
+ )
132
+ return self._maybe_validate(data, "DocumentCreateResponse")
133
+
134
+ async def upload_documents_batch(
135
+ self,
136
+ corpus_id: str,
137
+ documents: list[dict[str, Any]],
138
+ idempotency_key: str | None = None,
139
+ *,
140
+ auto_index: bool | None = None,
141
+ chunk_strategy: str | None = None,
142
+ chunking: ChunkingConfig | None = None,
143
+ wait: bool = True,
144
+ poll_s: int = 5,
145
+ timeout_s: float | None = None,
146
+ request_options: RequestOptions | None = None,
147
+ ) -> DocumentBatchUploadResponse:
148
+ """Upload multiple documents as raw text in a batch.
149
+
150
+ Args:
151
+ corpus_id: Target corpus ID.
152
+ documents: List of document dicts with raw_text, source_uri, metadata.
153
+ idempotency_key: Optional key for idempotent requests.
154
+ auto_index: Whether to auto-index after ingestion.
155
+ chunk_strategy: Deprecated - use chunking instead.
156
+ chunking: Chunking configuration (strategy, chunk_size, overlap, etc.)
157
+ wait: If True, wait for the batch job to complete.
158
+ poll_s: Polling interval when waiting.
159
+ timeout_s: Maximum seconds to wait for job completion.
160
+
161
+ Returns:
162
+ Response with ``doc_ids`` (list of created document IDs),
163
+ ``job_id``, and ``count``.
164
+ """
165
+ corpus_id = require_str(corpus_id, "corpus_id")
166
+ payload: dict[str, Any] = {"documents": documents}
167
+ if auto_index is not None:
168
+ payload["auto_index"] = auto_index
169
+ if chunking is not None:
170
+ payload["chunking"] = chunking
171
+ elif chunk_strategy is not None:
172
+ payload["chunk_strategy"] = chunk_strategy
173
+ headers = self._idempotency_headers(idempotency_key)
174
+ data = await self._request(
175
+ "POST",
176
+ f"/v1/corpora/{corpus_id}/documents:batch",
177
+ json=payload,
178
+ headers=headers,
179
+ request_options=request_options,
180
+ )
181
+ if wait:
182
+ job_id = data.get("job_id")
183
+ if job_id:
184
+ await self._wait_for_job(job_id, poll_s=poll_s, timeout_s=timeout_s)
185
+ return self._maybe_validate(data, "DocumentBatchUploadResponse")
186
+
187
+ async def upload_files_batch(
188
+ self,
189
+ corpus_id: str,
190
+ files: list[tuple[str, bytes]],
191
+ idempotency_key: str | None = None,
192
+ *,
193
+ auto_index: bool | None = None,
194
+ chunk_strategy: str | None = None,
195
+ chunking: ChunkingConfig | None = None,
196
+ wait: bool = True,
197
+ poll_s: int = 5,
198
+ timeout_s: float | None = None,
199
+ request_options: RequestOptions | None = None,
200
+ ) -> DocumentBatchUploadResponse:
201
+ """Upload multiple files in a single multipart request.
202
+
203
+ Args:
204
+ corpus_id: Target corpus ID.
205
+ files: List of (filename, content_bytes) tuples.
206
+ idempotency_key: Optional key for idempotent requests.
207
+ auto_index: Whether to auto-index after ingestion.
208
+ chunk_strategy: Deprecated - use chunking instead.
209
+ chunking: Chunking configuration (strategy, chunk_size, overlap, etc.)
210
+ wait: If True, wait for the batch job to complete.
211
+ poll_s: Polling interval when waiting.
212
+ timeout_s: Maximum seconds to wait for job completion.
213
+
214
+ Returns:
215
+ Response with job_id, doc_ids, and count.
216
+ """
217
+ corpus_id = require_str(corpus_id, "corpus_id")
218
+ headers = self._idempotency_headers(idempotency_key)
219
+
220
+ files_list = [("files", (filename, content)) for filename, content in files]
221
+
222
+ form_data: dict[str, Any] = {}
223
+ if auto_index is not None:
224
+ form_data["auto_index"] = str(auto_index).lower()
225
+ if chunking is not None:
226
+ form_data["chunking"] = json.dumps(chunking)
227
+ elif chunk_strategy is not None:
228
+ form_data["chunk_strategy"] = chunk_strategy
229
+
230
+ data = await self._request(
231
+ "POST",
232
+ f"/v1/corpora/{corpus_id}/documents:upload_batch",
233
+ data=form_data if form_data else None,
234
+ files=files_list,
235
+ headers=headers,
236
+ request_options=request_options,
237
+ )
238
+ if wait:
239
+ job_id = data.get("job_id")
240
+ if job_id:
241
+ await self._wait_for_job(job_id, poll_s=poll_s, timeout_s=timeout_s)
242
+ return self._maybe_validate(data, "DocumentBatchUploadResponse")
243
+
244
+ async def upload_documents_parallel(
245
+ self,
246
+ corpus_id: str,
247
+ file_paths: list[str],
248
+ *,
249
+ max_workers: int = 8,
250
+ auto_index: bool | None = None,
251
+ chunking: ChunkingConfig | None = None,
252
+ metadata: dict[str, Any] | None = None,
253
+ request_options: RequestOptions | None = None,
254
+ ) -> list[DocumentCreateResponse]:
255
+ """Upload multiple files concurrently using asyncio.gather.
256
+
257
+ Each file is uploaded as a separate HTTP request via
258
+ :meth:`upload_document`. For large batches this is significantly
259
+ faster than uploading sequentially.
260
+
261
+ Unlike :meth:`upload_files_batch` (which sends a single multipart
262
+ request for server-side batching), this method issues concurrent
263
+ individual uploads for client-side parallelism.
264
+
265
+ .. warning:: **All-or-nothing semantics.** On partial failure an
266
+ :class:`ExceptionGroup` is raised and **no** successful results are
267
+ returned. Callers who need partial-failure recovery should upload
268
+ files individually via :meth:`upload_document` and handle errors
269
+ per file.
270
+
271
+ Args:
272
+ corpus_id: Target corpus ID.
273
+ file_paths: List of local file paths to upload.
274
+ max_workers: Maximum number of concurrent upload coroutines
275
+ (default 8, must be >= 1).
276
+ auto_index: Whether to auto-index after ingestion.
277
+ chunking: Chunking configuration applied to each upload.
278
+ metadata: Optional metadata applied to every document.
279
+
280
+ Returns:
281
+ List of upload responses (one per file, in input order).
282
+
283
+ Raises:
284
+ ExceptionGroup: If one or more uploads fail, containing all
285
+ individual exceptions from failed uploads. Successful
286
+ results from other uploads are discarded.
287
+ ValueError: If *max_workers* is less than 1 or greater than 256.
288
+ """
289
+ corpus_id = require_str(corpus_id, "corpus_id")
290
+ if max_workers < 1:
291
+ raise ValueError(f"max_workers must be >= 1, got {max_workers}")
292
+ if max_workers > 256:
293
+ raise ValueError(f"max_workers must be <= 256, got {max_workers}")
294
+ if not file_paths:
295
+ return []
296
+
297
+ semaphore = asyncio.Semaphore(max_workers)
298
+
299
+ async def _upload(fp: str) -> DocumentCreateResponse:
300
+ async with semaphore:
301
+ return await self.upload_document(
302
+ corpus_id,
303
+ file_path=fp,
304
+ auto_index=auto_index,
305
+ chunking=chunking,
306
+ metadata=metadata,
307
+ request_options=request_options,
308
+ )
309
+
310
+ tasks = [_upload(fp) for fp in file_paths]
311
+ settled = await asyncio.gather(*tasks, return_exceptions=True)
312
+
313
+ results: list[DocumentCreateResponse] = []
314
+ errors: list[Exception] = []
315
+ for outcome in settled:
316
+ if isinstance(outcome, Exception):
317
+ errors.append(outcome)
318
+ elif isinstance(outcome, BaseException):
319
+ err = RuntimeError(str(outcome))
320
+ err.__cause__ = outcome
321
+ errors.append(err)
322
+ else:
323
+ results.append(outcome)
324
+
325
+ if errors:
326
+ raise ExceptionGroup(f"{len(errors)} of {len(file_paths)} uploads failed", errors)
327
+ return results
328
+
329
+ async def ingest_urls(
330
+ self,
331
+ corpus_id: str,
332
+ urls: list[dict[str, Any]],
333
+ idempotency_key: str | None = None,
334
+ *,
335
+ auto_index: bool | None = None,
336
+ chunk_strategy: str | None = None,
337
+ chunking: ChunkingConfig | None = None,
338
+ wait: bool = True,
339
+ poll_s: int = 5,
340
+ timeout_s: float | None = None,
341
+ request_options: RequestOptions | None = None,
342
+ ) -> DocumentUrlIngestResponse:
343
+ """Ingest documents from URLs.
344
+
345
+ Args:
346
+ corpus_id: Target corpus ID.
347
+ urls: List of URL dicts with url, title, tags, metadata.
348
+ idempotency_key: Optional key for idempotent requests.
349
+ auto_index: Whether to auto-index after ingestion.
350
+ chunk_strategy: Deprecated - use chunking instead.
351
+ chunking: Chunking configuration (strategy, chunk_size, overlap, etc.)
352
+ wait: If True, wait for the batch job to complete.
353
+ poll_s: Polling interval when waiting.
354
+ timeout_s: Maximum seconds to wait for job completion.
355
+ """
356
+ corpus_id = require_str(corpus_id, "corpus_id")
357
+ payload: dict[str, Any] = {"urls": urls}
358
+ if auto_index is not None:
359
+ payload["auto_index"] = auto_index
360
+ if chunking is not None:
361
+ payload["chunking"] = chunking
362
+ elif chunk_strategy is not None:
363
+ payload["chunk_strategy"] = chunk_strategy
364
+ headers = self._idempotency_headers(idempotency_key)
365
+ data = await self._request(
366
+ "POST",
367
+ f"/v1/corpora/{corpus_id}/documents:ingest_urls",
368
+ json=payload,
369
+ headers=headers,
370
+ request_options=request_options,
371
+ )
372
+ if wait:
373
+ job_id = data.get("job_id")
374
+ if job_id:
375
+ await self._wait_for_job(job_id, poll_s=poll_s, timeout_s=timeout_s)
376
+ return self._maybe_validate(data, "DocumentUrlIngestResponse")
377
+
378
+ async def ingest_manifest(
379
+ self,
380
+ corpus_id: str,
381
+ manifest_uri: str,
382
+ max_documents: int | None = None,
383
+ idempotency_key: str | None = None,
384
+ *,
385
+ auto_index: bool | None = None,
386
+ chunk_strategy: str | None = None,
387
+ chunking: ChunkingConfig | None = None,
388
+ request_options: RequestOptions | None = None,
389
+ ) -> DocumentManifestIngestResponse:
390
+ """Ingest documents from a manifest file.
391
+
392
+ Args:
393
+ corpus_id: Target corpus ID.
394
+ manifest_uri: URI to manifest file (S3, HTTP, local).
395
+ max_documents: Optional limit on documents to ingest.
396
+ idempotency_key: Optional key for idempotent requests.
397
+ auto_index: Whether to auto-index after ingestion.
398
+ chunk_strategy: Deprecated - use chunking instead.
399
+ chunking: Chunking configuration (strategy, chunk_size, overlap, etc.)
400
+ """
401
+ corpus_id = require_str(corpus_id, "corpus_id")
402
+ payload: dict[str, Any] = {"manifest_uri": manifest_uri}
403
+ if max_documents is not None:
404
+ payload["max_documents"] = max_documents
405
+ if auto_index is not None:
406
+ payload["auto_index"] = auto_index
407
+ if chunking is not None:
408
+ payload["chunking"] = chunking
409
+ elif chunk_strategy is not None:
410
+ payload["chunk_strategy"] = chunk_strategy
411
+ headers = self._idempotency_headers(idempotency_key)
412
+ data = await self._request(
413
+ "POST",
414
+ f"/v1/corpora/{corpus_id}/documents:ingest_manifest",
415
+ json=payload,
416
+ headers=headers,
417
+ request_options=request_options,
418
+ )
419
+ return self._maybe_validate(data, "DocumentManifestIngestResponse")
420
+
421
+ async def list_documents(
422
+ self,
423
+ corpus_id: str,
424
+ *,
425
+ limit: int = 100,
426
+ offset: int = 0,
427
+ q: str | None = None,
428
+ status: str | None = None,
429
+ source: str | None = None,
430
+ tag: str | None = None,
431
+ request_options: RequestOptions | None = None,
432
+ ) -> Page[dict[str, Any]]:
433
+ corpus_id = require_str(corpus_id, "corpus_id")
434
+ params: dict[str, Any] = {}
435
+ if q is not None:
436
+ params["q"] = q
437
+ if status is not None:
438
+ params["status"] = status
439
+ if source is not None:
440
+ params["source"] = source
441
+ if tag is not None:
442
+ params["tag"] = tag
443
+ return await self._list_page(
444
+ "GET",
445
+ f"/v1/corpora/{corpus_id}/documents",
446
+ items_key="documents",
447
+ params=params or None,
448
+ limit=limit,
449
+ offset=offset,
450
+ )
451
+
452
+ def iter_documents(
453
+ self,
454
+ corpus_id: str,
455
+ *,
456
+ limit: int = 100,
457
+ q: str | None = None,
458
+ status: str | None = None,
459
+ source: str | None = None,
460
+ tag: str | None = None,
461
+ request_options: RequestOptions | None = None,
462
+ ) -> AsyncPager[dict[str, Any]]:
463
+ """Lazily paginate documents, yielding individual document items."""
464
+ corpus_id = require_str(corpus_id, "corpus_id")
465
+ params: dict[str, Any] = {}
466
+ if q is not None:
467
+ params["q"] = q
468
+ if status is not None:
469
+ params["status"] = status
470
+ if source is not None:
471
+ params["source"] = source
472
+ if tag is not None:
473
+ params["tag"] = tag
474
+ return self._paginate(
475
+ "GET",
476
+ f"/v1/corpora/{corpus_id}/documents",
477
+ items_key="documents",
478
+ params=params if params else None,
479
+ limit=limit,
480
+ )
481
+
482
+ async def get_document(
483
+ self,
484
+ doc_id: str,
485
+ request_options: RequestOptions | None = None,
486
+ ) -> DocumentDetailResponse:
487
+ doc_id = require_str(doc_id, "doc_id")
488
+ data = await self._request(
489
+ "GET", f"/v1/documents/{doc_id}", request_options=request_options
490
+ )
491
+ return self._maybe_validate(data, "DocumentDetailResponse")
492
+
493
+ async def update_document_metadata(
494
+ self,
495
+ doc_id: str,
496
+ metadata: dict[str, Any],
497
+ request_options: RequestOptions | None = None,
498
+ ) -> dict[str, Any]:
499
+ """Update customer metadata on a document using merge semantics.
500
+
501
+ Keys with non-empty values are added or updated.
502
+ Keys with empty string or None values are removed.
503
+ Keys not in the request are left unchanged.
504
+
505
+ Args:
506
+ doc_id: Document ID to update
507
+ metadata: Dict of metadata updates to apply
508
+
509
+ Returns:
510
+ Updated metadata dict with custom_metadata and system_metadata
511
+ """
512
+ doc_id = require_str(doc_id, "doc_id")
513
+ response = await self._request(
514
+ "PATCH",
515
+ f"/v1/documents/{doc_id}/metadata",
516
+ json=metadata,
517
+ request_options=request_options,
518
+ )
519
+ return response
520
+
521
+ async def delete_document(
522
+ self,
523
+ corpus_id: str,
524
+ doc_id: str,
525
+ *,
526
+ confirm: bool = False,
527
+ reindex: bool = False,
528
+ request_options: RequestOptions | None = None,
529
+ ) -> DocumentDeleteResponse:
530
+ """Delete a document from a corpus.
531
+
532
+ This is an irreversible operation. You must pass ``confirm=True``
533
+ to acknowledge this and proceed.
534
+
535
+ Args:
536
+ corpus_id: The corpus containing the document.
537
+ doc_id: Unique identifier of the document to delete.
538
+ confirm: Safety guard -- must be ``True`` to execute the
539
+ deletion. Raises ``ConfirmationRequiredError`` when ``False``.
540
+ reindex: If ``True``, trigger a re-index of the corpus after
541
+ deletion.
542
+
543
+ Returns:
544
+ Confirmation of the deletion.
545
+
546
+ Raises:
547
+ ConfirmationRequiredError: If *confirm* is not ``True``.
548
+ Knowledge2Error: If the API request fails.
549
+ """
550
+ corpus_id = require_str(corpus_id, "corpus_id")
551
+ doc_id = require_str(doc_id, "doc_id")
552
+ if not confirm:
553
+ raise ConfirmationRequiredError("document", doc_id)
554
+ data = await self._request(
555
+ "DELETE",
556
+ f"/v1/corpora/{corpus_id}/documents/{doc_id}",
557
+ params={"reindex": reindex},
558
+ request_options=request_options,
559
+ )
560
+ return self._maybe_validate(data, "DocumentDeleteResponse")
561
+
562
+ async def list_chunks(
563
+ self,
564
+ corpus_id: str,
565
+ limit: int = 100,
566
+ offset: int = 0,
567
+ request_options: RequestOptions | None = None,
568
+ ) -> Page[dict[str, Any]]:
569
+ corpus_id = require_str(corpus_id, "corpus_id")
570
+ return await self._list_page(
571
+ "GET",
572
+ f"/v1/corpora/{corpus_id}/chunks",
573
+ items_key="chunks",
574
+ limit=limit,
575
+ offset=offset,
576
+ )
577
+
578
+ def iter_chunks(
579
+ self,
580
+ corpus_id: str,
581
+ *,
582
+ limit: int = 100,
583
+ request_options: RequestOptions | None = None,
584
+ ) -> AsyncPager[dict[str, Any]]:
585
+ """Lazily paginate chunks, yielding individual chunk items."""
586
+ corpus_id = require_str(corpus_id, "corpus_id")
587
+ return self._paginate(
588
+ "GET",
589
+ f"/v1/corpora/{corpus_id}/chunks",
590
+ items_key="chunks",
591
+ limit=limit,
592
+ )