knowhere-python-sdk 0.2.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. knowhere_python_sdk-0.3.0/.release-please-manifest.json +3 -0
  2. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/CHANGELOG.md +8 -0
  3. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/PKG-INFO +72 -1
  4. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/README.md +71 -0
  5. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/docs/usage.md +127 -0
  6. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/pyproject.toml +1 -1
  7. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/__init__.py +13 -0
  8. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/_client.py +43 -1
  9. knowhere_python_sdk-0.3.0/src/knowhere/_version.py +1 -0
  10. knowhere_python_sdk-0.3.0/src/knowhere/resources/__init__.py +16 -0
  11. knowhere_python_sdk-0.3.0/src/knowhere/resources/documents.py +74 -0
  12. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/resources/jobs.py +14 -0
  13. knowhere_python_sdk-0.3.0/src/knowhere/resources/retrieval.py +70 -0
  14. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/types/__init__.py +13 -0
  15. knowhere_python_sdk-0.3.0/src/knowhere/types/document.py +28 -0
  16. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/types/job.py +4 -0
  17. knowhere_python_sdk-0.3.0/src/knowhere/types/retrieval.py +33 -0
  18. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/conftest.py +4 -1
  19. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_client.py +34 -0
  20. knowhere_python_sdk-0.3.0/tests/test_documents.py +106 -0
  21. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_jobs.py +11 -2
  22. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_models.py +16 -0
  23. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_polling.py +1 -1
  24. knowhere_python_sdk-0.3.0/tests/test_retrieval.py +110 -0
  25. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_retry.py +0 -1
  26. knowhere_python_sdk-0.2.1/.release-please-manifest.json +0 -3
  27. knowhere_python_sdk-0.2.1/src/knowhere/_version.py +0 -1
  28. knowhere_python_sdk-0.2.1/src/knowhere/resources/__init__.py +0 -7
  29. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/.github/workflows/ci.yml +0 -0
  30. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/.github/workflows/publish-pypi.yml +0 -0
  31. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/.github/workflows/publish.yml +0 -0
  32. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/.gitignore +0 -0
  33. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/examples/async_usage.py +0 -0
  34. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/examples/error_handling.py +0 -0
  35. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/examples/parse_file.py +0 -0
  36. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/examples/parse_url.py +0 -0
  37. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/examples/step_by_step.py +0 -0
  38. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/release-please-config.json +0 -0
  39. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/_base_client.py +0 -0
  40. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/_constants.py +0 -0
  41. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/_exceptions.py +0 -0
  42. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/_logging.py +0 -0
  43. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/_response.py +0 -0
  44. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/_types.py +0 -0
  45. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/lib/__init__.py +0 -0
  46. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/lib/polling.py +0 -0
  47. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/lib/result_parser.py +0 -0
  48. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/lib/upload.py +0 -0
  49. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/py.typed +0 -0
  50. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/resources/_base.py +0 -0
  51. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/types/params.py +0 -0
  52. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/types/result.py +0 -0
  53. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/src/knowhere/types/shared.py +0 -0
  54. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/__init__.py +0 -0
  55. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/fixtures/real_result.zip +0 -0
  56. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_exceptions.py +0 -0
  57. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_logging.py +0 -0
  58. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_parse.py +0 -0
  59. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_result_parser.py +0 -0
  60. {knowhere_python_sdk-0.2.1 → knowhere_python_sdk-0.3.0}/tests/test_upload.py +0 -0
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "0.3.0"
3
+ }
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.0](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.2.1...v0.3.0) (2026-04-21)
4
+
5
+
6
+ ### Features
7
+
8
+ * add retrieval service sdk clients ([bceef5c](https://github.com/Ontos-AI/knowhere-python-sdk/commit/bceef5cf379dba39543244bd6ca86262a536fb9b))
9
+ * integrate retrieval service v1 in Python SDK ([bce7aa8](https://github.com/Ontos-AI/knowhere-python-sdk/commit/bce7aa8dbf069d5880b92c6f9d8996878251f7cb))
10
+
3
11
  ## [0.2.1](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.2.0...v0.2.1) (2026-04-09)
4
12
 
5
13
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowhere-python-sdk
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Official Python SDK for the Knowhere document parsing API
5
5
  Project-URL: Homepage, https://knowhereto.ai
6
6
  Project-URL: Documentation, https://docs.knowhereto.ai
@@ -64,6 +64,74 @@ for chunk in result.text_chunks:
64
64
  print(chunk.content[:80])
65
65
  ```
66
66
 
67
+ ## Retrieval and document lifecycle
68
+
69
+ New documents are published into a retrieval namespace. The server returns a
70
+ stable `document_id` when you create a job; persist that value if you need to
71
+ update or archive the same document later.
72
+
73
+ ```python
74
+ job = client.jobs.create(
75
+ source_type="url",
76
+ source_url="https://example.com/manual.pdf",
77
+ namespace="support-center",
78
+ )
79
+
80
+ print(job.document_id) # "doc_..."
81
+ ```
82
+
83
+ After the job is done and published, query the canonical document content:
84
+
85
+ ```python
86
+ response = client.retrieval.query(
87
+ namespace="support-center",
88
+ query="How do I reset Bluetooth pairing?",
89
+ top_k=5,
90
+ )
91
+
92
+ for result in response.results:
93
+ print(result.content)
94
+ print(result.score)
95
+ print(result.source.source_file_name, result.source.section_path)
96
+ ```
97
+
98
+ Use `document_id` to update or archive a document:
99
+
100
+ ```python
101
+ update_job = client.jobs.create(
102
+ source_type="url",
103
+ source_url="https://example.com/manual-v2.pdf",
104
+ document_id=job.document_id,
105
+ )
106
+
107
+ document = client.documents.get(job.document_id)
108
+ print(document.status)
109
+
110
+ client.documents.archive(job.document_id)
111
+ ```
112
+
113
+ You can also list documents in a namespace:
114
+
115
+ ```python
116
+ documents = client.documents.list(namespace="support-center")
117
+ for document in documents.documents:
118
+ print(document.document_id, document.status)
119
+ ```
120
+
121
+ Retrieval supports exclusions when clients want follow-up results that avoid
122
+ previously used documents or sections:
123
+
124
+ ```python
125
+ response = client.retrieval.query(
126
+ namespace="support-center",
127
+ query="battery charging",
128
+ exclude_document_ids=["doc_old"],
129
+ exclude_sections=[
130
+ {"document_id": "doc_123", "section_path": "Appendix / Legal"}
131
+ ],
132
+ )
133
+ ```
134
+
67
135
  While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
68
136
 
69
137
  ### Parse a local file
@@ -137,9 +205,12 @@ from pathlib import Path
137
205
  job = client.jobs.create(
138
206
  source_type="file",
139
207
  file_name="report.pdf",
208
+ namespace="support-center",
140
209
  parsing_params={"model": "advanced", "ocr_enabled": True},
141
210
  )
142
211
 
212
+ print(job.document_id) # Persist this to update/archive the document later.
213
+
143
214
  # Step 2: Upload file to presigned URL
144
215
  client.jobs.upload(job, file=Path("report.pdf"))
145
216
 
@@ -32,6 +32,74 @@ for chunk in result.text_chunks:
32
32
  print(chunk.content[:80])
33
33
  ```
34
34
 
35
+ ## Retrieval and document lifecycle
36
+
37
+ New documents are published into a retrieval namespace. The server returns a
38
+ stable `document_id` when you create a job; persist that value if you need to
39
+ update or archive the same document later.
40
+
41
+ ```python
42
+ job = client.jobs.create(
43
+ source_type="url",
44
+ source_url="https://example.com/manual.pdf",
45
+ namespace="support-center",
46
+ )
47
+
48
+ print(job.document_id) # "doc_..."
49
+ ```
50
+
51
+ After the job is done and published, query the canonical document content:
52
+
53
+ ```python
54
+ response = client.retrieval.query(
55
+ namespace="support-center",
56
+ query="How do I reset Bluetooth pairing?",
57
+ top_k=5,
58
+ )
59
+
60
+ for result in response.results:
61
+ print(result.content)
62
+ print(result.score)
63
+ print(result.source.source_file_name, result.source.section_path)
64
+ ```
65
+
66
+ Use `document_id` to update or archive a document:
67
+
68
+ ```python
69
+ update_job = client.jobs.create(
70
+ source_type="url",
71
+ source_url="https://example.com/manual-v2.pdf",
72
+ document_id=job.document_id,
73
+ )
74
+
75
+ document = client.documents.get(job.document_id)
76
+ print(document.status)
77
+
78
+ client.documents.archive(job.document_id)
79
+ ```
80
+
81
+ You can also list documents in a namespace:
82
+
83
+ ```python
84
+ documents = client.documents.list(namespace="support-center")
85
+ for document in documents.documents:
86
+ print(document.document_id, document.status)
87
+ ```
88
+
89
+ Retrieval supports exclusions when clients want follow-up results that avoid
90
+ previously used documents or sections:
91
+
92
+ ```python
93
+ response = client.retrieval.query(
94
+ namespace="support-center",
95
+ query="battery charging",
96
+ exclude_document_ids=["doc_old"],
97
+ exclude_sections=[
98
+ {"document_id": "doc_123", "section_path": "Appendix / Legal"}
99
+ ],
100
+ )
101
+ ```
102
+
35
103
  While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `KNOWHERE_API_KEY="sk_..."` to your `.env` file so that your API key is not stored in source control.
36
104
 
37
105
  ### Parse a local file
@@ -105,9 +173,12 @@ from pathlib import Path
105
173
  job = client.jobs.create(
106
174
  source_type="file",
107
175
  file_name="report.pdf",
176
+ namespace="support-center",
108
177
  parsing_params={"model": "advanced", "ocr_enabled": True},
109
178
  )
110
179
 
180
+ print(job.document_id) # Persist this to update/archive the document later.
181
+
111
182
  # Step 2: Upload file to presigned URL
112
183
  client.jobs.upload(job, file=Path("report.pdf"))
113
184
 
@@ -12,6 +12,7 @@ Comprehensive reference for every feature, parameter, and pattern in the SDK.
12
12
  - [Working with Results](#working-with-results)
13
13
  - [Chunk Types](#chunk-types)
14
14
  - [Step-by-Step Control (Jobs API)](#step-by-step-control-jobs-api)
15
+ - [Retrieval and Document Lifecycle](#retrieval-and-document-lifecycle)
15
16
  - [Async Usage](#async-usage)
16
17
  - [Progress Callbacks](#progress-callbacks)
17
18
  - [Error Handling](#error-handling)
@@ -316,8 +317,10 @@ from pathlib import Path
316
317
  job = client.jobs.create(
317
318
  source_type="file",
318
319
  file_name="report.pdf",
320
+ namespace="support-center",
319
321
  parsing_params={"model": "advanced", "ocr_enabled": True},
320
322
  )
323
+ print(job.document_id) # Persist this value for update/archive flows.
321
324
 
322
325
  # Step 2: Upload file to the presigned URL
323
326
  client.jobs.upload(job, file=Path("report.pdf"))
@@ -341,6 +344,8 @@ print(result.statistics)
341
344
  | `source_type` | `"url" \| "file"` | — | Required. Whether parsing from URL or uploaded file. |
342
345
  | `source_url` | `str \| None` | `None` | URL to parse (required when `source_type="url"`). |
343
346
  | `file_name` | `str \| None` | `None` | Original filename (used when `source_type="file"`). |
347
+ | `namespace` | `str \| None` | `None` | Retrieval namespace. The server defaults to `"default"` when omitted. |
348
+ | `document_id` | `str \| None` | `None` | Existing document ID when creating an update job. Omit for a new document. |
344
349
  | `data_id` | `str \| None` | `None` | Your own correlation/idempotency identifier. |
345
350
  | `parsing_params` | `ParsingParams \| None` | `None` | Parsing configuration. |
346
351
  | `webhook` | `WebhookConfig \| None` | `None` | Webhook for completion notification. |
@@ -351,6 +356,8 @@ Returns a `Job` object:
351
356
  job.job_id # "abc-123"
352
357
  job.status # "pending"
353
358
  job.source_type # "file"
359
+ job.namespace # "support-center"
360
+ job.document_id # "doc_..." — persist this for updates and archive calls
354
361
  job.upload_url # presigned URL (for file uploads)
355
362
  job.upload_headers # headers to include in the upload request
356
363
  job.expires_in # seconds until upload URL expires
@@ -407,6 +414,119 @@ result = client.jobs.load("https://storage.example.com/result.zip")
407
414
 
408
415
  ---
409
416
 
417
+ ## Retrieval and Document Lifecycle
418
+
419
+ The retrieval APIs operate on canonical documents that are published after a
420
+ job completes. For new documents, the server generates `document_id` during
421
+ `jobs.create()`. Store that ID in your application if you need to update or
422
+ archive the same document later.
423
+
424
+ ### Create a retrievable document
425
+
426
+ ```python
427
+ job = client.jobs.create(
428
+ source_type="url",
429
+ source_url="https://example.com/manual.pdf",
430
+ namespace="support-center",
431
+ )
432
+
433
+ print(job.document_id) # "doc_..."
434
+ ```
435
+
436
+ For file uploads, the flow is the same except that you upload the file before
437
+ polling:
438
+
439
+ ```python
440
+ job = client.jobs.create(
441
+ source_type="file",
442
+ file_name="manual.pdf",
443
+ namespace="support-center",
444
+ )
445
+ client.jobs.upload(job, file=Path("manual.pdf"))
446
+ job_result = client.jobs.wait(job.job_id)
447
+ ```
448
+
449
+ ### Update an existing document
450
+
451
+ Pass the prior `document_id` to create an update job. If `namespace` is omitted,
452
+ the API resolves the namespace from the existing document.
453
+
454
+ ```python
455
+ update_job = client.jobs.create(
456
+ source_type="url",
457
+ source_url="https://example.com/manual-v2.pdf",
458
+ document_id=job.document_id,
459
+ )
460
+ ```
461
+
462
+ The API rejects concurrent non-terminal jobs for the same document with a
463
+ retryable `ConflictError` using the server error code `ABORTED`.
464
+
465
+ ### Query retrieval results
466
+
467
+ ```python
468
+ response = client.retrieval.query(
469
+ namespace="support-center",
470
+ query="How do I pair a Bluetooth headset?",
471
+ top_k=5,
472
+ )
473
+
474
+ for result in response.results:
475
+ print(result.content)
476
+ print(result.score)
477
+ print(result.source.document_id)
478
+ print(result.source.source_file_name)
479
+ print(result.source.section_path)
480
+ ```
481
+
482
+ Retrieval results expose `content`, not the older parse-result `text` field.
483
+ Media results may include `asset_url` when the server can sign the referenced
484
+ artifact.
485
+
486
+ Each retrieval result uses one canonical source reference shape:
487
+
488
+ ```python
489
+ result.content
490
+ result.chunk_type
491
+ result.score
492
+ result.asset_url # Optional[str]
493
+ result.source.document_id
494
+ result.source.source_file_name
495
+ result.source.section_path
496
+ ```
497
+
498
+ ### Exclude documents or sections
499
+
500
+ Use exclusions for follow-up queries that should avoid already-used context.
501
+
502
+ ```python
503
+ response = client.retrieval.query(
504
+ namespace="support-center",
505
+ query="battery charging",
506
+ top_k=10,
507
+ exclude_document_ids=["doc_old"],
508
+ exclude_sections=[
509
+ {"document_id": "doc_123", "section_path": "Appendix / Legal"}
510
+ ],
511
+ )
512
+ ```
513
+
514
+ ### List, get, and archive documents
515
+
516
+ ```python
517
+ document_list = client.documents.list(namespace="support-center")
518
+ for document in document_list.documents:
519
+ print(document.document_id, document.status, document.source_file_name)
520
+
521
+ document = client.documents.get("doc_123")
522
+ print(document.current_job_result_id)
523
+
524
+ archived = client.documents.archive("doc_123")
525
+ print(archived.status) # "archived"
526
+ ```
527
+
528
+ ---
529
+
410
530
  ## Async Usage
411
531
 
412
532
  Every method available on `Knowhere` has an async counterpart on `AsyncKnowhere`:
@@ -429,6 +549,13 @@ async def main():
429
549
  job_result = await client.jobs.wait(job.job_id)
430
550
  result = await client.jobs.load(job_result)
431
551
 
552
+ retrieval = await client.retrieval.query(
553
+ namespace="support-center",
554
+ query="refund policy",
555
+ top_k=5,
556
+ )
557
+ print(retrieval.results[0].content)
558
+
432
559
  asyncio.run(main())
433
560
  ```
434
561
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "knowhere-python-sdk"
7
- version = "0.2.1"
7
+ version = "0.3.0"
8
8
  description = "Official Python SDK for the Knowhere document parsing API"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -35,8 +35,14 @@ from knowhere._exceptions import (
35
35
  )
36
36
  from knowhere._types import PollProgressCallback, UploadProgressCallback
37
37
  from knowhere._version import __version__
38
+ from knowhere.types.document import Document, DocumentListResponse
38
39
  from knowhere.types.job import Job, JobError, JobProgress, JobResult
39
40
  from knowhere.types.params import ParsingParams, WebhookConfig
41
+ from knowhere.types.retrieval import (
42
+ RetrievalSource,
43
+ RetrievalQueryResponse,
44
+ RetrievalResult,
45
+ )
40
46
  from knowhere.types.result import (
41
47
  BaseChunk,
42
48
  Checksum,
@@ -87,6 +93,13 @@ __all__: list[str] = [
87
93
  "JobError",
88
94
  "JobProgress",
89
95
  "JobResult",
96
+ # Document types
97
+ "Document",
98
+ "DocumentListResponse",
99
+ # Retrieval types
100
+ "RetrievalSource",
101
+ "RetrievalQueryResponse",
102
+ "RetrievalResult",
90
103
  # Result types
91
104
  "ParseResult",
92
105
  "Manifest",
@@ -19,7 +19,9 @@ from knowhere._types import (
19
19
  PollProgressCallback,
20
20
  UploadProgressCallback,
21
21
  )
22
+ from knowhere.resources.documents import AsyncDocuments, Documents
22
23
  from knowhere.resources.jobs import AsyncJobs, Jobs
24
+ from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
23
25
  from knowhere.types.job import Job, JobResult
24
26
  from knowhere.types.params import ParsingParams, WebhookConfig
25
27
  from knowhere.types.result import ParseResult
@@ -42,6 +44,16 @@ class Knowhere(SyncAPIClient):
42
44
  """Access the jobs resource namespace."""
43
45
  return Jobs(self)
44
46
 
47
+ @cached_property
48
+ def retrieval(self) -> Retrieval:
49
+ """Access the retrieval resource namespace."""
50
+ return Retrieval(self)
51
+
52
+ @cached_property
53
+ def documents(self) -> Documents:
54
+ """Access the documents resource namespace."""
55
+ return Documents(self)
56
+
45
57
  # -- overloaded parse signatures --
46
58
 
47
59
  @overload
@@ -50,6 +62,8 @@ class Knowhere(SyncAPIClient):
50
62
  *,
51
63
  url: str,
52
64
  data_id: Optional[str] = ...,
65
+ namespace: Optional[str] = ...,
66
+ document_id: Optional[str] = ...,
53
67
  parsing_params: Optional[ParsingParams] = ...,
54
68
  webhook: Optional[WebhookConfig] = ...,
55
69
  poll_interval: float = ...,
@@ -66,6 +80,8 @@ class Knowhere(SyncAPIClient):
66
80
  file: Union[Path, BinaryIO, bytes],
67
81
  file_name: Optional[str] = ...,
68
82
  data_id: Optional[str] = ...,
83
+ namespace: Optional[str] = ...,
84
+ document_id: Optional[str] = ...,
69
85
  parsing_params: Optional[ParsingParams] = ...,
70
86
  webhook: Optional[WebhookConfig] = ...,
71
87
  poll_interval: float = ...,
@@ -82,6 +98,8 @@ class Knowhere(SyncAPIClient):
82
98
  file: Optional[Union[Path, BinaryIO, bytes]] = None,
83
99
  file_name: Optional[str] = None,
84
100
  data_id: Optional[str] = None,
101
+ namespace: Optional[str] = None,
102
+ document_id: Optional[str] = None,
85
103
  parsing_params: Optional[ParsingParams] = None,
86
104
  webhook: Optional[WebhookConfig] = None,
87
105
  poll_interval: float = DEFAULT_POLL_INTERVAL,
@@ -105,6 +123,8 @@ class Knowhere(SyncAPIClient):
105
123
  source_type="url",
106
124
  source_url=url,
107
125
  data_id=data_id,
126
+ namespace=namespace,
127
+ document_id=document_id,
108
128
  parsing_params=parsing_params,
109
129
  webhook=webhook,
110
130
  )
@@ -116,6 +136,8 @@ class Knowhere(SyncAPIClient):
116
136
  source_type="file",
117
137
  file_name=resolved_name,
118
138
  data_id=data_id,
139
+ namespace=namespace,
140
+ document_id=document_id,
119
141
  parsing_params=parsing_params,
120
142
  webhook=webhook,
121
143
  )
@@ -149,12 +171,24 @@ class AsyncKnowhere(AsyncAPIClient):
149
171
  """Access the async jobs resource namespace."""
150
172
  return AsyncJobs(self)
151
173
 
174
+ @cached_property
175
+ def retrieval(self) -> AsyncRetrieval:
176
+ """Access the async retrieval resource namespace."""
177
+ return AsyncRetrieval(self)
178
+
179
+ @cached_property
180
+ def documents(self) -> AsyncDocuments:
181
+ """Access the async documents resource namespace."""
182
+ return AsyncDocuments(self)
183
+
152
184
  @overload
153
185
  async def parse(
154
186
  self,
155
187
  *,
156
188
  url: str,
157
189
  data_id: Optional[str] = ...,
190
+ namespace: Optional[str] = ...,
191
+ document_id: Optional[str] = ...,
158
192
  parsing_params: Optional[ParsingParams] = ...,
159
193
  webhook: Optional[WebhookConfig] = ...,
160
194
  poll_interval: float = ...,
@@ -171,6 +205,8 @@ class AsyncKnowhere(AsyncAPIClient):
171
205
  file: Union[Path, BinaryIO, bytes],
172
206
  file_name: Optional[str] = ...,
173
207
  data_id: Optional[str] = ...,
208
+ namespace: Optional[str] = ...,
209
+ document_id: Optional[str] = ...,
174
210
  parsing_params: Optional[ParsingParams] = ...,
175
211
  webhook: Optional[WebhookConfig] = ...,
176
212
  poll_interval: float = ...,
@@ -187,6 +223,8 @@ class AsyncKnowhere(AsyncAPIClient):
187
223
  file: Optional[Union[Path, BinaryIO, bytes]] = None,
188
224
  file_name: Optional[str] = None,
189
225
  data_id: Optional[str] = None,
226
+ namespace: Optional[str] = None,
227
+ document_id: Optional[str] = None,
190
228
  parsing_params: Optional[ParsingParams] = None,
191
229
  webhook: Optional[WebhookConfig] = None,
192
230
  poll_interval: float = DEFAULT_POLL_INTERVAL,
@@ -206,6 +244,8 @@ class AsyncKnowhere(AsyncAPIClient):
206
244
  source_type="url",
207
245
  source_url=url,
208
246
  data_id=data_id,
247
+ namespace=namespace,
248
+ document_id=document_id,
209
249
  parsing_params=parsing_params,
210
250
  webhook=webhook,
211
251
  )
@@ -217,6 +257,8 @@ class AsyncKnowhere(AsyncAPIClient):
217
257
  source_type="file",
218
258
  file_name=resolved_name,
219
259
  data_id=data_id,
260
+ namespace=namespace,
261
+ document_id=document_id,
220
262
  parsing_params=parsing_params,
221
263
  webhook=webhook,
222
264
  )
@@ -232,4 +274,4 @@ class AsyncKnowhere(AsyncAPIClient):
232
274
 
233
275
  return await self.jobs.load(
234
276
  job_result, verify_checksum=verify_checksum
235
- )
277
+ )
@@ -0,0 +1 @@
1
+ __version__ = "0.3.0" # x-release-please-version
@@ -0,0 +1,16 @@
1
+ """Resource namespace re-exports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from knowhere.resources.documents import AsyncDocuments, Documents
6
+ from knowhere.resources.jobs import AsyncJobs, Jobs
7
+ from knowhere.resources.retrieval import AsyncRetrieval, Retrieval
8
+
9
+ __all__: list[str] = [
10
+ "AsyncDocuments",
11
+ "AsyncJobs",
12
+ "AsyncRetrieval",
13
+ "Documents",
14
+ "Jobs",
15
+ "Retrieval",
16
+ ]
@@ -0,0 +1,74 @@
1
+ """Documents resource for canonical document lifecycle operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
+ from knowhere.types.document import Document, DocumentListResponse
9
+
10
+
11
+ class Documents(SyncAPIResource):
12
+ """Synchronous interface for ``/v1/documents`` endpoints."""
13
+
14
+ def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
15
+ """List canonical documents in a namespace."""
16
+ params: Dict[str, Any] = {}
17
+ if namespace is not None:
18
+ params["namespace"] = namespace
19
+
20
+ return self._request(
21
+ "GET",
22
+ "v1/documents",
23
+ params=params or None,
24
+ cast_to=DocumentListResponse,
25
+ )
26
+
27
+ def get(self, document_id: str) -> Document:
28
+ """Get one canonical document by ID."""
29
+ return self._request(
30
+ "GET",
31
+ f"v1/documents/{document_id}",
32
+ cast_to=Document,
33
+ )
34
+
35
+ def archive(self, document_id: str) -> Document:
36
+ """Archive one canonical document by ID."""
37
+ return self._request(
38
+ "POST",
39
+ f"v1/documents/{document_id}/archive",
40
+ cast_to=Document,
41
+ )
42
+
43
+
44
+ class AsyncDocuments(AsyncAPIResource):
45
+ """Asynchronous interface for ``/v1/documents`` endpoints."""
46
+
47
+ async def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
48
+ """List canonical documents in a namespace."""
49
+ params: Dict[str, Any] = {}
50
+ if namespace is not None:
51
+ params["namespace"] = namespace
52
+
53
+ return await self._request(
54
+ "GET",
55
+ "v1/documents",
56
+ params=params or None,
57
+ cast_to=DocumentListResponse,
58
+ )
59
+
60
+ async def get(self, document_id: str) -> Document:
61
+ """Get one canonical document by ID."""
62
+ return await self._request(
63
+ "GET",
64
+ f"v1/documents/{document_id}",
65
+ cast_to=Document,
66
+ )
67
+
68
+ async def archive(self, document_id: str) -> Document:
69
+ """Archive one canonical document by ID."""
70
+ return await self._request(
71
+ "POST",
72
+ f"v1/documents/{document_id}/archive",
73
+ cast_to=Document,
74
+ )
@@ -34,6 +34,8 @@ class Jobs(SyncAPIResource):
34
34
  source_type: str,
35
35
  source_url: Optional[str] = None,
36
36
  file_name: Optional[str] = None,
37
+ namespace: Optional[str] = None,
38
+ document_id: Optional[str] = None,
37
39
  data_id: Optional[str] = None,
38
40
  parsing_params: Optional[ParsingParams] = None,
39
41
  webhook: Optional[WebhookConfig] = None,
@@ -44,6 +46,8 @@ class Jobs(SyncAPIResource):
44
46
  source_type: ``"url"`` or ``"file"``.
45
47
  source_url: URL to parse (required when ``source_type="url"``).
46
48
  file_name: Original filename (used when ``source_type="file"``).
49
+ namespace: Retrieval namespace. Defaults to the server ``default``.
50
+ document_id: Existing document ID when creating an update job.
47
51
  data_id: Optional idempotency / correlation identifier.
48
52
  parsing_params: Optional parsing configuration.
49
53
  webhook: Optional webhook configuration.
@@ -56,6 +60,10 @@ class Jobs(SyncAPIResource):
56
60
  body["source_url"] = source_url
57
61
  if file_name is not None:
58
62
  body["file_name"] = file_name
63
+ if namespace is not None:
64
+ body["namespace"] = namespace
65
+ if document_id is not None:
66
+ body["document_id"] = document_id
59
67
  if data_id is not None:
60
68
  body["data_id"] = data_id
61
69
  if parsing_params is not None:
@@ -158,6 +166,8 @@ class AsyncJobs(AsyncAPIResource):
158
166
  source_type: str,
159
167
  source_url: Optional[str] = None,
160
168
  file_name: Optional[str] = None,
169
+ namespace: Optional[str] = None,
170
+ document_id: Optional[str] = None,
161
171
  data_id: Optional[str] = None,
162
172
  parsing_params: Optional[ParsingParams] = None,
163
173
  webhook: Optional[WebhookConfig] = None,
@@ -168,6 +178,10 @@ class AsyncJobs(AsyncAPIResource):
168
178
  body["source_url"] = source_url
169
179
  if file_name is not None:
170
180
  body["file_name"] = file_name
181
+ if namespace is not None:
182
+ body["namespace"] = namespace
183
+ if document_id is not None:
184
+ body["document_id"] = document_id
171
185
  if data_id is not None:
172
186
  body["data_id"] = data_id
173
187
  if parsing_params is not None: