knowhere-python-sdk 0.3.2__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. knowhere_python_sdk-0.4.0/.release-please-manifest.json +3 -0
  2. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/CHANGELOG.md +7 -0
  3. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/PKG-INFO +16 -1
  4. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/README.md +15 -0
  5. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/docs/usage.md +16 -0
  6. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/pyproject.toml +1 -1
  7. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/__init__.py +14 -1
  8. knowhere_python_sdk-0.4.0/src/knowhere/_version.py +1 -0
  9. knowhere_python_sdk-0.4.0/src/knowhere/resources/documents.py +191 -0
  10. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/types/__init__.py +14 -1
  11. knowhere_python_sdk-0.4.0/src/knowhere/types/document.py +78 -0
  12. knowhere_python_sdk-0.4.0/tests/test_documents.py +219 -0
  13. knowhere_python_sdk-0.3.2/.release-please-manifest.json +0 -3
  14. knowhere_python_sdk-0.3.2/src/knowhere/_version.py +0 -1
  15. knowhere_python_sdk-0.3.2/src/knowhere/resources/documents.py +0 -74
  16. knowhere_python_sdk-0.3.2/src/knowhere/types/document.py +0 -28
  17. knowhere_python_sdk-0.3.2/tests/test_documents.py +0 -106
  18. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
  19. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  20. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/.github/ISSUE_TEMPLATE/feature-request.yml +0 -0
  21. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/.github/pull_request_template.md +0 -0
  22. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/.github/workflows/ci.yml +0 -0
  23. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/.github/workflows/publish-pypi.yml +0 -0
  24. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/.github/workflows/publish.yml +0 -0
  25. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/.gitignore +0 -0
  26. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/CODE_OF_CONDUCT.md +0 -0
  27. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/CONTRIBUTING.md +0 -0
  28. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/LICENSE +0 -0
  29. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/SECURITY.md +0 -0
  30. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/examples/async_usage.py +0 -0
  31. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/examples/error_handling.py +0 -0
  32. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/examples/parse_file.py +0 -0
  33. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/examples/parse_url.py +0 -0
  34. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/examples/step_by_step.py +0 -0
  35. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/release-please-config.json +0 -0
  36. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/_base_client.py +0 -0
  37. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/_client.py +0 -0
  38. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/_constants.py +0 -0
  39. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/_exceptions.py +0 -0
  40. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/_logging.py +0 -0
  41. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/_response.py +0 -0
  42. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/_types.py +0 -0
  43. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/lib/__init__.py +0 -0
  44. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/lib/polling.py +0 -0
  45. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/lib/result_parser.py +0 -0
  46. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/lib/upload.py +0 -0
  47. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/py.typed +0 -0
  48. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/resources/__init__.py +0 -0
  49. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/resources/_base.py +0 -0
  50. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/resources/jobs.py +0 -0
  51. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/resources/retrieval.py +0 -0
  52. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/types/job.py +0 -0
  53. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/types/params.py +0 -0
  54. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/types/result.py +0 -0
  55. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/types/retrieval.py +0 -0
  56. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/src/knowhere/types/shared.py +0 -0
  57. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/__init__.py +0 -0
  58. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/conftest.py +0 -0
  59. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/fixtures/real_result.zip +0 -0
  60. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_client.py +0 -0
  61. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_exceptions.py +0 -0
  62. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_jobs.py +0 -0
  63. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_logging.py +0 -0
  64. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_models.py +0 -0
  65. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_parse.py +0 -0
  66. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_polling.py +0 -0
  67. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_result_parser.py +0 -0
  68. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_retrieval.py +0 -0
  69. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_retry.py +0 -0
  70. {knowhere_python_sdk-0.3.2 → knowhere_python_sdk-0.4.0}/tests/test_upload.py +0 -0
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "0.4.0"
3
+ }
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.0](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.3.2...v0.4.0) (2026-04-27)
4
+
5
+
6
+ ### Features
7
+
8
+ * add document chunks resource methods ([73094d4](https://github.com/Ontos-AI/knowhere-python-sdk/commit/73094d4f95ef693785fa3965f6f2a223dfd2a350))
9
+
3
10
  ## [0.3.2](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.3.1...v0.3.2) (2026-04-23)
4
11
 
5
12
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowhere-python-sdk
3
- Version: 0.3.2
3
+ Version: 0.4.0
4
4
  Summary: Official Python SDK for the Knowhere document parsing API
5
5
  Project-URL: Homepage, https://knowhereto.ai
6
6
  Project-URL: Documentation, https://docs.knowhereto.ai
@@ -118,6 +118,21 @@ update_job = client.jobs.create(
118
118
  document = client.documents.get(document_id)
119
119
  print(document.status)
120
120
 
121
+ chunks = client.documents.list_chunks(
122
+ document_id,
123
+ page=1,
124
+ page_size=50,
125
+ chunk_type="text",
126
+ )
127
+ print(chunks.pagination.total)
128
+ if chunks.chunks:
129
+ chunk = client.documents.get_chunk(
130
+ document_id,
131
+ chunks.chunks[0].id,
132
+ include_asset_urls=True,
133
+ )
134
+ print(chunk.chunk.content)
135
+
121
136
  client.documents.archive(document_id)
122
137
  ```
123
138
 
@@ -85,6 +85,21 @@ update_job = client.jobs.create(
85
85
  document = client.documents.get(document_id)
86
86
  print(document.status)
87
87
 
88
+ chunks = client.documents.list_chunks(
89
+ document_id,
90
+ page=1,
91
+ page_size=50,
92
+ chunk_type="text",
93
+ )
94
+ print(chunks.pagination.total)
95
+ if chunks.chunks:
96
+ chunk = client.documents.get_chunk(
97
+ document_id,
98
+ chunks.chunks[0].id,
99
+ include_asset_urls=True,
100
+ )
101
+ print(chunk.chunk.content)
102
+
88
103
  client.documents.archive(document_id)
89
104
  ```
90
105
 
@@ -521,6 +521,22 @@ for document in document_list.documents:
521
521
  document = client.documents.get("doc_123")
522
522
  print(document.current_job_result_id)
523
523
 
524
+ chunks = client.documents.list_chunks(
525
+ "doc_123",
526
+ page=1,
527
+ page_size=50,
528
+ chunk_type="text",
529
+ )
530
+ for chunk in chunks.chunks:
531
+ print(chunk.id, chunk.content)
532
+
533
+ image_chunk = client.documents.get_chunk(
534
+ "doc_123",
535
+ "dchk_123",
536
+ include_asset_urls=True,
537
+ )
538
+ print(image_chunk.chunk.asset_url)
539
+
524
540
  archived = client.documents.archive("doc_123")
525
541
  print(archived.status) # "archived"
526
542
  ```
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "knowhere-python-sdk"
7
- version = "0.3.2"
7
+ version = "0.4.0"
8
8
  description = "Official Python SDK for the Knowhere document parsing API"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -35,7 +35,15 @@ from knowhere._exceptions import (
35
35
  )
36
36
  from knowhere._types import PollProgressCallback, UploadProgressCallback
37
37
  from knowhere._version import __version__
38
- from knowhere.types.document import Document, DocumentListResponse
38
+ from knowhere.types.document import (
39
+ Document,
40
+ DocumentChunk,
41
+ DocumentChunkListResponse,
42
+ DocumentChunkPagination,
43
+ DocumentChunkResponse,
44
+ DocumentChunkType,
45
+ DocumentListResponse,
46
+ )
39
47
  from knowhere.types.job import Job, JobError, JobProgress, JobResult
40
48
  from knowhere.types.params import ParsingParams, WebhookConfig
41
49
  from knowhere.types.retrieval import (
@@ -98,6 +106,11 @@ __all__: list[str] = [
98
106
  "JobResult",
99
107
  # Document types
100
108
  "Document",
109
+ "DocumentChunk",
110
+ "DocumentChunkListResponse",
111
+ "DocumentChunkPagination",
112
+ "DocumentChunkResponse",
113
+ "DocumentChunkType",
101
114
  "DocumentListResponse",
102
115
  # Retrieval types
103
116
  "RetrievalChannel",
@@ -0,0 +1 @@
1
+ __version__ = "0.4.0" # x-release-please-version
@@ -0,0 +1,191 @@
1
+ """Documents resource for canonical document lifecycle operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
+ from knowhere.types.document import (
9
+ Document,
10
+ DocumentChunkListResponse,
11
+ DocumentChunkResponse,
12
+ DocumentChunkType,
13
+ DocumentListResponse,
14
+ )
15
+
16
+
17
+ class Documents(SyncAPIResource):
18
+ """Synchronous interface for ``/v1/documents`` endpoints."""
19
+
20
+ def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
21
+ """List canonical documents in a namespace."""
22
+ params: Dict[str, Any] = {}
23
+ if namespace is not None:
24
+ params["namespace"] = namespace
25
+
26
+ return self._request(
27
+ "GET",
28
+ "v1/documents",
29
+ params=params or None,
30
+ cast_to=DocumentListResponse,
31
+ )
32
+
33
+ def get(self, document_id: str) -> Document:
34
+ """Get one canonical document by ID."""
35
+ return self._request(
36
+ "GET",
37
+ f"v1/documents/{document_id}",
38
+ cast_to=Document,
39
+ )
40
+
41
+ def list_chunks(
42
+ self,
43
+ document_id: str,
44
+ *,
45
+ page: int = 1,
46
+ page_size: int = 50,
47
+ chunk_type: Optional[DocumentChunkType] = None,
48
+ include_asset_urls: bool = False,
49
+ ) -> DocumentChunkListResponse:
50
+ """List current-revision chunks for one canonical document."""
51
+ params: Dict[str, Any] = _build_chunk_list_params(
52
+ page=page,
53
+ page_size=page_size,
54
+ chunk_type=chunk_type,
55
+ include_asset_urls=include_asset_urls,
56
+ )
57
+
58
+ return self._request(
59
+ "GET",
60
+ f"v1/documents/{document_id}/chunks",
61
+ params=params or None,
62
+ cast_to=DocumentChunkListResponse,
63
+ )
64
+
65
+ def get_chunk(
66
+ self,
67
+ document_id: str,
68
+ document_chunk_id: str,
69
+ *,
70
+ include_asset_urls: bool = False,
71
+ ) -> DocumentChunkResponse:
72
+ """Get one current-revision chunk for one canonical document."""
73
+ params: Dict[str, Any] = _build_chunk_get_params(
74
+ include_asset_urls=include_asset_urls,
75
+ )
76
+
77
+ return self._request(
78
+ "GET",
79
+ f"v1/documents/{document_id}/chunks/{document_chunk_id}",
80
+ params=params or None,
81
+ cast_to=DocumentChunkResponse,
82
+ )
83
+
84
+ def archive(self, document_id: str) -> Document:
85
+ """Archive one canonical document by ID."""
86
+ return self._request(
87
+ "POST",
88
+ f"v1/documents/{document_id}/archive",
89
+ cast_to=Document,
90
+ )
91
+
92
+
93
+ class AsyncDocuments(AsyncAPIResource):
94
+ """Asynchronous interface for ``/v1/documents`` endpoints."""
95
+
96
+ async def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
97
+ """List canonical documents in a namespace."""
98
+ params: Dict[str, Any] = {}
99
+ if namespace is not None:
100
+ params["namespace"] = namespace
101
+
102
+ return await self._request(
103
+ "GET",
104
+ "v1/documents",
105
+ params=params or None,
106
+ cast_to=DocumentListResponse,
107
+ )
108
+
109
+ async def get(self, document_id: str) -> Document:
110
+ """Get one canonical document by ID."""
111
+ return await self._request(
112
+ "GET",
113
+ f"v1/documents/{document_id}",
114
+ cast_to=Document,
115
+ )
116
+
117
+ async def list_chunks(
118
+ self,
119
+ document_id: str,
120
+ *,
121
+ page: int = 1,
122
+ page_size: int = 50,
123
+ chunk_type: Optional[DocumentChunkType] = None,
124
+ include_asset_urls: bool = False,
125
+ ) -> DocumentChunkListResponse:
126
+ """List current-revision chunks for one canonical document."""
127
+ params: Dict[str, Any] = _build_chunk_list_params(
128
+ page=page,
129
+ page_size=page_size,
130
+ chunk_type=chunk_type,
131
+ include_asset_urls=include_asset_urls,
132
+ )
133
+
134
+ return await self._request(
135
+ "GET",
136
+ f"v1/documents/{document_id}/chunks",
137
+ params=params or None,
138
+ cast_to=DocumentChunkListResponse,
139
+ )
140
+
141
+ async def get_chunk(
142
+ self,
143
+ document_id: str,
144
+ document_chunk_id: str,
145
+ *,
146
+ include_asset_urls: bool = False,
147
+ ) -> DocumentChunkResponse:
148
+ """Get one current-revision chunk for one canonical document."""
149
+ params: Dict[str, Any] = _build_chunk_get_params(
150
+ include_asset_urls=include_asset_urls,
151
+ )
152
+
153
+ return await self._request(
154
+ "GET",
155
+ f"v1/documents/{document_id}/chunks/{document_chunk_id}",
156
+ params=params or None,
157
+ cast_to=DocumentChunkResponse,
158
+ )
159
+
160
+ async def archive(self, document_id: str) -> Document:
161
+ """Archive one canonical document by ID."""
162
+ return await self._request(
163
+ "POST",
164
+ f"v1/documents/{document_id}/archive",
165
+ cast_to=Document,
166
+ )
167
+
168
+
169
+ def _build_chunk_list_params(
170
+ *,
171
+ page: int,
172
+ page_size: int,
173
+ chunk_type: Optional[DocumentChunkType],
174
+ include_asset_urls: bool,
175
+ ) -> Dict[str, Any]:
176
+ params: Dict[str, Any] = {}
177
+ if page != 1:
178
+ params["page"] = page
179
+ if page_size != 50:
180
+ params["page_size"] = page_size
181
+ if chunk_type is not None:
182
+ params["chunk_type"] = chunk_type
183
+ if include_asset_urls:
184
+ params["include_asset_urls"] = True
185
+ return params
186
+
187
+
188
+ def _build_chunk_get_params(*, include_asset_urls: bool) -> Dict[str, Any]:
189
+ if not include_asset_urls:
190
+ return {}
191
+ return {"include_asset_urls": True}
@@ -2,7 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from knowhere.types.document import Document, DocumentListResponse
5
+ from knowhere.types.document import (
6
+ Document,
7
+ DocumentChunk,
8
+ DocumentChunkListResponse,
9
+ DocumentChunkPagination,
10
+ DocumentChunkResponse,
11
+ DocumentChunkType,
12
+ DocumentListResponse,
13
+ )
6
14
  from knowhere.types.job import Job, JobError, JobResult
7
15
  from knowhere.types.params import ParsingParams, WebhookConfig
8
16
  from knowhere.types.retrieval import (
@@ -39,6 +47,11 @@ __all__: list[str] = [
39
47
  "JobResult",
40
48
  # document
41
49
  "Document",
50
+ "DocumentChunk",
51
+ "DocumentChunkListResponse",
52
+ "DocumentChunkPagination",
53
+ "DocumentChunkResponse",
54
+ "DocumentChunkType",
42
55
  "DocumentListResponse",
43
56
  # retrieval
44
57
  "RetrievalChannel",
@@ -0,0 +1,78 @@
1
+ """Pydantic models for canonical document lifecycle responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from typing import Any, Dict, Literal, Optional
7
+
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class Document(BaseModel):
12
+ """Canonical document state returned by ``/v1/documents`` endpoints."""
13
+
14
+ document_id: str
15
+ namespace: str
16
+ status: str
17
+ current_job_result_id: Optional[str] = None
18
+ source_file_name: Optional[str] = None
19
+ created_at: Optional[datetime] = None
20
+ updated_at: Optional[datetime] = None
21
+ archived_at: Optional[datetime] = None
22
+
23
+
24
+ class DocumentListResponse(BaseModel):
25
+ """Response from ``GET /v1/documents``."""
26
+
27
+ namespace: str
28
+ documents: list[Document]
29
+
30
+
31
+ DocumentChunkType = Literal["text", "image", "table"]
32
+
33
+
34
+ class DocumentChunkPagination(BaseModel):
35
+ """Pagination metadata returned by document chunk list endpoints."""
36
+
37
+ page: int
38
+ page_size: int
39
+ total: int
40
+ total_pages: int
41
+
42
+
43
+ class DocumentChunk(BaseModel):
44
+ """One current-revision document chunk."""
45
+
46
+ id: str
47
+ chunk_id: str
48
+ chunk_type: DocumentChunkType
49
+ content: Optional[str] = None
50
+ section_id: Optional[str] = None
51
+ section_path: Optional[str] = None
52
+ source_chunk_path: Optional[str] = None
53
+ file_path: Optional[str] = None
54
+ sort_order: int
55
+ metadata: Dict[str, Any]
56
+ asset_url: Optional[str] = None
57
+ created_at: Optional[datetime] = None
58
+
59
+
60
+ class DocumentChunkListResponse(BaseModel):
61
+ """Response from ``GET /v1/documents/{document_id}/chunks``."""
62
+
63
+ document_id: str
64
+ namespace: str
65
+ job_result_id: Optional[str] = None
66
+ job_id: Optional[str] = None
67
+ chunks: list[DocumentChunk]
68
+ pagination: DocumentChunkPagination
69
+
70
+
71
+ class DocumentChunkResponse(BaseModel):
72
+ """Response from ``GET /v1/documents/{document_id}/chunks/{chunk_id}``."""
73
+
74
+ document_id: str
75
+ namespace: str
76
+ job_result_id: Optional[str] = None
77
+ job_id: Optional[str] = None
78
+ chunk: DocumentChunk
@@ -0,0 +1,219 @@
1
+ """Tests for the documents resource."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict
6
+
7
+ import httpx
8
+ import pytest
9
+ import respx
10
+
11
+ from tests.conftest import BASE_URL
12
+
13
+
14
+ DOCUMENTS_URL: str = f"{BASE_URL}/v1/documents"
15
+
16
+
17
+ def _make_document(status: str = "active") -> Dict[str, Any]:
18
+ return {
19
+ "document_id": "doc_123",
20
+ "namespace": "support-center",
21
+ "status": status,
22
+ "current_job_result_id": "result_123",
23
+ "source_file_name": "refund-policy.md",
24
+ "created_at": "2026-04-21T08:00:00Z",
25
+ "updated_at": "2026-04-21T08:30:00Z",
26
+ "archived_at": "2026-04-21T09:00:00Z" if status == "archived" else None,
27
+ }
28
+
29
+
30
+ def _make_document_chunk(chunk_type: str = "text") -> Dict[str, Any]:
31
+ return {
32
+ "id": "dchk_123",
33
+ "chunk_id": "parser-chunk-1",
34
+ "chunk_type": chunk_type,
35
+ "content": "Chunk content",
36
+ "section_id": "sec_123",
37
+ "section_path": "Chapter 1",
38
+ "source_chunk_path": "Chapter 1/Intro",
39
+ "file_path": "images/figure-1.png" if chunk_type == "image" else None,
40
+ "sort_order": 0,
41
+ "metadata": {"summary": "Intro", "page_nums": [1]},
42
+ "asset_url": "https://assets.example/figure-1.png" if chunk_type == "image" else None,
43
+ "created_at": "2026-04-27T04:00:00Z",
44
+ }
45
+
46
+
47
+ class TestDocumentsResource:
48
+ """Verify document lifecycle calls."""
49
+
50
+ @respx.mock
51
+ def test_list_documents_sends_namespace_query(self, sync_client: Any) -> None:
52
+ route = respx.get(DOCUMENTS_URL).mock(
53
+ return_value=httpx.Response(
54
+ 200,
55
+ json={
56
+ "namespace": "support-center",
57
+ "documents": [_make_document()],
58
+ },
59
+ )
60
+ )
61
+
62
+ response = sync_client.documents.list(namespace="support-center")
63
+
64
+ assert route.called
65
+ assert route.calls[0].request.url.params["namespace"] == "support-center"
66
+ assert response.namespace == "support-center"
67
+ assert response.documents[0].document_id == "doc_123"
68
+
69
+ @respx.mock
70
+ def test_list_documents_omits_namespace_when_defaulted(self, sync_client: Any) -> None:
71
+ route = respx.get(DOCUMENTS_URL).mock(
72
+ return_value=httpx.Response(
73
+ 200,
74
+ json={"namespace": "default", "documents": []},
75
+ )
76
+ )
77
+
78
+ response = sync_client.documents.list()
79
+
80
+ assert route.called
81
+ assert dict(route.calls[0].request.url.params) == {}
82
+ assert response.namespace == "default"
83
+ assert response.documents == []
84
+
85
+ @respx.mock
86
+ def test_get_document_returns_document_state(self, sync_client: Any) -> None:
87
+ route = respx.get(f"{DOCUMENTS_URL}/doc_123").mock(
88
+ return_value=httpx.Response(200, json=_make_document())
89
+ )
90
+
91
+ document = sync_client.documents.get("doc_123")
92
+
93
+ assert route.called
94
+ assert document.document_id == "doc_123"
95
+ assert document.status == "active"
96
+
97
+ @respx.mock
98
+ def test_list_chunks_sends_optional_query_params(self, sync_client: Any) -> None:
99
+ route = respx.get(f"{DOCUMENTS_URL}/doc_123/chunks").mock(
100
+ return_value=httpx.Response(
101
+ 200,
102
+ json={
103
+ "document_id": "doc_123",
104
+ "namespace": "support-center",
105
+ "job_result_id": "result_123",
106
+ "job_id": "job_123",
107
+ "chunks": [_make_document_chunk(chunk_type="table")],
108
+ "pagination": {
109
+ "page": 2,
110
+ "page_size": 10,
111
+ "total": 11,
112
+ "total_pages": 2,
113
+ },
114
+ },
115
+ )
116
+ )
117
+
118
+ response = sync_client.documents.list_chunks(
119
+ "doc_123",
120
+ page=2,
121
+ page_size=10,
122
+ chunk_type="table",
123
+ include_asset_urls=True,
124
+ )
125
+
126
+ assert route.called
127
+ assert route.calls[0].request.url.params["page"] == "2"
128
+ assert route.calls[0].request.url.params["page_size"] == "10"
129
+ assert route.calls[0].request.url.params["chunk_type"] == "table"
130
+ assert route.calls[0].request.url.params["include_asset_urls"] == "true"
131
+ assert response.document_id == "doc_123"
132
+ assert response.chunks[0].id == "dchk_123"
133
+ assert response.pagination.total_pages == 2
134
+
135
+ @respx.mock
136
+ def test_list_chunks_omits_default_query_params(self, sync_client: Any) -> None:
137
+ route = respx.get(f"{DOCUMENTS_URL}/doc_123/chunks").mock(
138
+ return_value=httpx.Response(
139
+ 200,
140
+ json={
141
+ "document_id": "doc_123",
142
+ "namespace": "support-center",
143
+ "job_result_id": None,
144
+ "job_id": None,
145
+ "chunks": [],
146
+ "pagination": {
147
+ "page": 1,
148
+ "page_size": 50,
149
+ "total": 0,
150
+ "total_pages": 0,
151
+ },
152
+ },
153
+ )
154
+ )
155
+
156
+ response = sync_client.documents.list_chunks("doc_123")
157
+
158
+ assert route.called
159
+ assert dict(route.calls[0].request.url.params) == {}
160
+ assert response.chunks == []
161
+ assert response.pagination.total == 0
162
+
163
+ @respx.mock
164
+ @pytest.mark.asyncio
165
+ async def test_async_get_chunk_requests_asset_urls_only_when_needed(
166
+ self,
167
+ async_client: Any,
168
+ ) -> None:
169
+ route = respx.get(f"{DOCUMENTS_URL}/doc_123/chunks/dchk_123").mock(
170
+ return_value=httpx.Response(
171
+ 200,
172
+ json={
173
+ "document_id": "doc_123",
174
+ "namespace": "support-center",
175
+ "job_result_id": "result_123",
176
+ "job_id": "job_123",
177
+ "chunk": _make_document_chunk(chunk_type="image"),
178
+ },
179
+ )
180
+ )
181
+
182
+ response = await async_client.documents.get_chunk(
183
+ "doc_123",
184
+ "dchk_123",
185
+ include_asset_urls=True,
186
+ )
187
+
188
+ assert route.called
189
+ assert route.calls[0].request.url.params["include_asset_urls"] == "true"
190
+ assert response.chunk.id == "dchk_123"
191
+ assert response.chunk.asset_url == "https://assets.example/figure-1.png"
192
+
193
+ @respx.mock
194
+ def test_archive_document_returns_archived_state(self, sync_client: Any) -> None:
195
+ route = respx.post(f"{DOCUMENTS_URL}/doc_123/archive").mock(
196
+ return_value=httpx.Response(200, json=_make_document(status="archived"))
197
+ )
198
+
199
+ document = sync_client.documents.archive("doc_123")
200
+
201
+ assert route.called
202
+ assert document.document_id == "doc_123"
203
+ assert document.status == "archived"
204
+ assert document.archived_at is not None
205
+
206
+ @respx.mock
207
+ @pytest.mark.asyncio
208
+ async def test_async_archive_document_returns_archived_state(
209
+ self,
210
+ async_client: Any,
211
+ ) -> None:
212
+ route = respx.post(f"{DOCUMENTS_URL}/doc_123/archive").mock(
213
+ return_value=httpx.Response(200, json=_make_document(status="archived"))
214
+ )
215
+
216
+ document = await async_client.documents.archive("doc_123")
217
+
218
+ assert route.called
219
+ assert document.status == "archived"
@@ -1,3 +0,0 @@
1
- {
2
- ".": "0.3.2"
3
- }
@@ -1 +0,0 @@
1
- __version__ = "0.3.2" # x-release-please-version
@@ -1,74 +0,0 @@
1
- """Documents resource for canonical document lifecycle operations."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Any, Dict, Optional
6
-
7
- from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
- from knowhere.types.document import Document, DocumentListResponse
9
-
10
-
11
- class Documents(SyncAPIResource):
12
- """Synchronous interface for ``/v1/documents`` endpoints."""
13
-
14
- def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
15
- """List canonical documents in a namespace."""
16
- params: Dict[str, Any] = {}
17
- if namespace is not None:
18
- params["namespace"] = namespace
19
-
20
- return self._request(
21
- "GET",
22
- "v1/documents",
23
- params=params or None,
24
- cast_to=DocumentListResponse,
25
- )
26
-
27
- def get(self, document_id: str) -> Document:
28
- """Get one canonical document by ID."""
29
- return self._request(
30
- "GET",
31
- f"v1/documents/{document_id}",
32
- cast_to=Document,
33
- )
34
-
35
- def archive(self, document_id: str) -> Document:
36
- """Archive one canonical document by ID."""
37
- return self._request(
38
- "POST",
39
- f"v1/documents/{document_id}/archive",
40
- cast_to=Document,
41
- )
42
-
43
-
44
- class AsyncDocuments(AsyncAPIResource):
45
- """Asynchronous interface for ``/v1/documents`` endpoints."""
46
-
47
- async def list(self, *, namespace: Optional[str] = None) -> DocumentListResponse:
48
- """List canonical documents in a namespace."""
49
- params: Dict[str, Any] = {}
50
- if namespace is not None:
51
- params["namespace"] = namespace
52
-
53
- return await self._request(
54
- "GET",
55
- "v1/documents",
56
- params=params or None,
57
- cast_to=DocumentListResponse,
58
- )
59
-
60
- async def get(self, document_id: str) -> Document:
61
- """Get one canonical document by ID."""
62
- return await self._request(
63
- "GET",
64
- f"v1/documents/{document_id}",
65
- cast_to=Document,
66
- )
67
-
68
- async def archive(self, document_id: str) -> Document:
69
- """Archive one canonical document by ID."""
70
- return await self._request(
71
- "POST",
72
- f"v1/documents/{document_id}/archive",
73
- cast_to=Document,
74
- )
@@ -1,28 +0,0 @@
1
- """Pydantic models for canonical document lifecycle responses."""
2
-
3
- from __future__ import annotations
4
-
5
- from datetime import datetime
6
- from typing import Optional
7
-
8
- from pydantic import BaseModel
9
-
10
-
11
- class Document(BaseModel):
12
- """Canonical document state returned by ``/v1/documents`` endpoints."""
13
-
14
- document_id: str
15
- namespace: str
16
- status: str
17
- current_job_result_id: Optional[str] = None
18
- source_file_name: Optional[str] = None
19
- created_at: Optional[datetime] = None
20
- updated_at: Optional[datetime] = None
21
- archived_at: Optional[datetime] = None
22
-
23
-
24
- class DocumentListResponse(BaseModel):
25
- """Response from ``GET /v1/documents``."""
26
-
27
- namespace: str
28
- documents: list[Document]
@@ -1,106 +0,0 @@
1
- """Tests for the documents resource."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Any, Dict
6
-
7
- import httpx
8
- import pytest
9
- import respx
10
-
11
- from tests.conftest import BASE_URL
12
-
13
-
14
- DOCUMENTS_URL: str = f"{BASE_URL}/v1/documents"
15
-
16
-
17
- def _make_document(status: str = "active") -> Dict[str, Any]:
18
- return {
19
- "document_id": "doc_123",
20
- "namespace": "support-center",
21
- "status": status,
22
- "current_job_result_id": "result_123",
23
- "source_file_name": "refund-policy.md",
24
- "created_at": "2026-04-21T08:00:00Z",
25
- "updated_at": "2026-04-21T08:30:00Z",
26
- "archived_at": "2026-04-21T09:00:00Z" if status == "archived" else None,
27
- }
28
-
29
-
30
- class TestDocumentsResource:
31
- """Verify document lifecycle calls."""
32
-
33
- @respx.mock
34
- def test_list_documents_sends_namespace_query(self, sync_client: Any) -> None:
35
- route = respx.get(DOCUMENTS_URL).mock(
36
- return_value=httpx.Response(
37
- 200,
38
- json={
39
- "namespace": "support-center",
40
- "documents": [_make_document()],
41
- },
42
- )
43
- )
44
-
45
- response = sync_client.documents.list(namespace="support-center")
46
-
47
- assert route.called
48
- assert route.calls[0].request.url.params["namespace"] == "support-center"
49
- assert response.namespace == "support-center"
50
- assert response.documents[0].document_id == "doc_123"
51
-
52
- @respx.mock
53
- def test_list_documents_omits_namespace_when_defaulted(self, sync_client: Any) -> None:
54
- route = respx.get(DOCUMENTS_URL).mock(
55
- return_value=httpx.Response(
56
- 200,
57
- json={"namespace": "default", "documents": []},
58
- )
59
- )
60
-
61
- response = sync_client.documents.list()
62
-
63
- assert route.called
64
- assert dict(route.calls[0].request.url.params) == {}
65
- assert response.namespace == "default"
66
- assert response.documents == []
67
-
68
- @respx.mock
69
- def test_get_document_returns_document_state(self, sync_client: Any) -> None:
70
- route = respx.get(f"{DOCUMENTS_URL}/doc_123").mock(
71
- return_value=httpx.Response(200, json=_make_document())
72
- )
73
-
74
- document = sync_client.documents.get("doc_123")
75
-
76
- assert route.called
77
- assert document.document_id == "doc_123"
78
- assert document.status == "active"
79
-
80
- @respx.mock
81
- def test_archive_document_returns_archived_state(self, sync_client: Any) -> None:
82
- route = respx.post(f"{DOCUMENTS_URL}/doc_123/archive").mock(
83
- return_value=httpx.Response(200, json=_make_document(status="archived"))
84
- )
85
-
86
- document = sync_client.documents.archive("doc_123")
87
-
88
- assert route.called
89
- assert document.document_id == "doc_123"
90
- assert document.status == "archived"
91
- assert document.archived_at is not None
92
-
93
- @respx.mock
94
- @pytest.mark.asyncio
95
- async def test_async_archive_document_returns_archived_state(
96
- self,
97
- async_client: Any,
98
- ) -> None:
99
- route = respx.post(f"{DOCUMENTS_URL}/doc_123/archive").mock(
100
- return_value=httpx.Response(200, json=_make_document(status="archived"))
101
- )
102
-
103
- document = await async_client.documents.archive("doc_123")
104
-
105
- assert route.called
106
- assert document.status == "archived"