knowhere-python-sdk 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
knowhere/__init__.py CHANGED
@@ -35,7 +35,15 @@ from knowhere._exceptions import (
35
35
  )
36
36
  from knowhere._types import PollProgressCallback, UploadProgressCallback
37
37
  from knowhere._version import __version__
38
- from knowhere.types.document import Document, DocumentListResponse
38
+ from knowhere.types.document import (
39
+ Document,
40
+ DocumentChunk,
41
+ DocumentChunkListResponse,
42
+ DocumentChunkPagination,
43
+ DocumentChunkResponse,
44
+ DocumentChunkType,
45
+ DocumentListResponse,
46
+ )
39
47
  from knowhere.types.job import Job, JobError, JobProgress, JobResult
40
48
  from knowhere.types.params import ParsingParams, WebhookConfig
41
49
  from knowhere.types.retrieval import (
@@ -98,6 +106,11 @@ __all__: list[str] = [
98
106
  "JobResult",
99
107
  # Document types
100
108
  "Document",
109
+ "DocumentChunk",
110
+ "DocumentChunkListResponse",
111
+ "DocumentChunkPagination",
112
+ "DocumentChunkResponse",
113
+ "DocumentChunkType",
101
114
  "DocumentListResponse",
102
115
  # Retrieval types
103
116
  "RetrievalChannel",
knowhere/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.1" # x-release-please-version
1
+ __version__ = "0.4.0" # x-release-please-version
@@ -5,7 +5,13 @@ from __future__ import annotations
5
5
  from typing import Any, Dict, Optional
6
6
 
7
7
  from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
- from knowhere.types.document import Document, DocumentListResponse
8
+ from knowhere.types.document import (
9
+ Document,
10
+ DocumentChunkListResponse,
11
+ DocumentChunkResponse,
12
+ DocumentChunkType,
13
+ DocumentListResponse,
14
+ )
9
15
 
10
16
 
11
17
  class Documents(SyncAPIResource):
@@ -32,6 +38,49 @@ class Documents(SyncAPIResource):
32
38
  cast_to=Document,
33
39
  )
34
40
 
41
+ def list_chunks(
42
+ self,
43
+ document_id: str,
44
+ *,
45
+ page: int = 1,
46
+ page_size: int = 50,
47
+ chunk_type: Optional[DocumentChunkType] = None,
48
+ include_asset_urls: bool = False,
49
+ ) -> DocumentChunkListResponse:
50
+ """List current-revision chunks for one canonical document."""
51
+ params: Dict[str, Any] = _build_chunk_list_params(
52
+ page=page,
53
+ page_size=page_size,
54
+ chunk_type=chunk_type,
55
+ include_asset_urls=include_asset_urls,
56
+ )
57
+
58
+ return self._request(
59
+ "GET",
60
+ f"v1/documents/{document_id}/chunks",
61
+ params=params or None,
62
+ cast_to=DocumentChunkListResponse,
63
+ )
64
+
65
+ def get_chunk(
66
+ self,
67
+ document_id: str,
68
+ document_chunk_id: str,
69
+ *,
70
+ include_asset_urls: bool = False,
71
+ ) -> DocumentChunkResponse:
72
+ """Get one current-revision chunk for one canonical document."""
73
+ params: Dict[str, Any] = _build_chunk_get_params(
74
+ include_asset_urls=include_asset_urls,
75
+ )
76
+
77
+ return self._request(
78
+ "GET",
79
+ f"v1/documents/{document_id}/chunks/{document_chunk_id}",
80
+ params=params or None,
81
+ cast_to=DocumentChunkResponse,
82
+ )
83
+
35
84
  def archive(self, document_id: str) -> Document:
36
85
  """Archive one canonical document by ID."""
37
86
  return self._request(
@@ -65,6 +114,49 @@ class AsyncDocuments(AsyncAPIResource):
65
114
  cast_to=Document,
66
115
  )
67
116
 
117
+ async def list_chunks(
118
+ self,
119
+ document_id: str,
120
+ *,
121
+ page: int = 1,
122
+ page_size: int = 50,
123
+ chunk_type: Optional[DocumentChunkType] = None,
124
+ include_asset_urls: bool = False,
125
+ ) -> DocumentChunkListResponse:
126
+ """List current-revision chunks for one canonical document."""
127
+ params: Dict[str, Any] = _build_chunk_list_params(
128
+ page=page,
129
+ page_size=page_size,
130
+ chunk_type=chunk_type,
131
+ include_asset_urls=include_asset_urls,
132
+ )
133
+
134
+ return await self._request(
135
+ "GET",
136
+ f"v1/documents/{document_id}/chunks",
137
+ params=params or None,
138
+ cast_to=DocumentChunkListResponse,
139
+ )
140
+
141
+ async def get_chunk(
142
+ self,
143
+ document_id: str,
144
+ document_chunk_id: str,
145
+ *,
146
+ include_asset_urls: bool = False,
147
+ ) -> DocumentChunkResponse:
148
+ """Get one current-revision chunk for one canonical document."""
149
+ params: Dict[str, Any] = _build_chunk_get_params(
150
+ include_asset_urls=include_asset_urls,
151
+ )
152
+
153
+ return await self._request(
154
+ "GET",
155
+ f"v1/documents/{document_id}/chunks/{document_chunk_id}",
156
+ params=params or None,
157
+ cast_to=DocumentChunkResponse,
158
+ )
159
+
68
160
  async def archive(self, document_id: str) -> Document:
69
161
  """Archive one canonical document by ID."""
70
162
  return await self._request(
@@ -72,3 +164,28 @@ class AsyncDocuments(AsyncAPIResource):
72
164
  f"v1/documents/{document_id}/archive",
73
165
  cast_to=Document,
74
166
  )
167
+
168
+
169
+ def _build_chunk_list_params(
170
+ *,
171
+ page: int,
172
+ page_size: int,
173
+ chunk_type: Optional[DocumentChunkType],
174
+ include_asset_urls: bool,
175
+ ) -> Dict[str, Any]:
176
+ params: Dict[str, Any] = {}
177
+ if page != 1:
178
+ params["page"] = page
179
+ if page_size != 50:
180
+ params["page_size"] = page_size
181
+ if chunk_type is not None:
182
+ params["chunk_type"] = chunk_type
183
+ if include_asset_urls:
184
+ params["include_asset_urls"] = True
185
+ return params
186
+
187
+
188
+ def _build_chunk_get_params(*, include_asset_urls: bool) -> Dict[str, Any]:
189
+ if not include_asset_urls:
190
+ return {}
191
+ return {"include_asset_urls": True}
@@ -2,7 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from knowhere.types.document import Document, DocumentListResponse
5
+ from knowhere.types.document import (
6
+ Document,
7
+ DocumentChunk,
8
+ DocumentChunkListResponse,
9
+ DocumentChunkPagination,
10
+ DocumentChunkResponse,
11
+ DocumentChunkType,
12
+ DocumentListResponse,
13
+ )
6
14
  from knowhere.types.job import Job, JobError, JobResult
7
15
  from knowhere.types.params import ParsingParams, WebhookConfig
8
16
  from knowhere.types.retrieval import (
@@ -39,6 +47,11 @@ __all__: list[str] = [
39
47
  "JobResult",
40
48
  # document
41
49
  "Document",
50
+ "DocumentChunk",
51
+ "DocumentChunkListResponse",
52
+ "DocumentChunkPagination",
53
+ "DocumentChunkResponse",
54
+ "DocumentChunkType",
42
55
  "DocumentListResponse",
43
56
  # retrieval
44
57
  "RetrievalChannel",
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from datetime import datetime
6
- from typing import Optional
6
+ from typing import Any, Dict, Literal, Optional
7
7
 
8
8
  from pydantic import BaseModel
9
9
 
@@ -26,3 +26,53 @@ class DocumentListResponse(BaseModel):
26
26
 
27
27
  namespace: str
28
28
  documents: list[Document]
29
+
30
+
31
+ DocumentChunkType = Literal["text", "image", "table"]
32
+
33
+
34
+ class DocumentChunkPagination(BaseModel):
35
+ """Pagination metadata returned by document chunk list endpoints."""
36
+
37
+ page: int
38
+ page_size: int
39
+ total: int
40
+ total_pages: int
41
+
42
+
43
+ class DocumentChunk(BaseModel):
44
+ """One current-revision document chunk."""
45
+
46
+ id: str
47
+ chunk_id: str
48
+ chunk_type: DocumentChunkType
49
+ content: Optional[str] = None
50
+ section_id: Optional[str] = None
51
+ section_path: Optional[str] = None
52
+ source_chunk_path: Optional[str] = None
53
+ file_path: Optional[str] = None
54
+ sort_order: int
55
+ metadata: Dict[str, Any]
56
+ asset_url: Optional[str] = None
57
+ created_at: Optional[datetime] = None
58
+
59
+
60
+ class DocumentChunkListResponse(BaseModel):
61
+ """Response from ``GET /v1/documents/{document_id}/chunks``."""
62
+
63
+ document_id: str
64
+ namespace: str
65
+ job_result_id: Optional[str] = None
66
+ job_id: Optional[str] = None
67
+ chunks: list[DocumentChunk]
68
+ pagination: DocumentChunkPagination
69
+
70
+
71
+ class DocumentChunkResponse(BaseModel):
72
+ """Response from ``GET /v1/documents/{document_id}/chunks/{chunk_id}``."""
73
+
74
+ document_id: str
75
+ namespace: str
76
+ job_result_id: Optional[str] = None
77
+ job_id: Optional[str] = None
78
+ chunk: DocumentChunk
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowhere-python-sdk
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: Official Python SDK for the Knowhere document parsing API
5
5
  Project-URL: Homepage, https://knowhereto.ai
6
6
  Project-URL: Documentation, https://docs.knowhereto.ai
7
7
  Project-URL: Repository, https://github.com/Ontos-AI/knowhere-python-sdk
8
8
  Author-email: Knowhere Team <team@knowhereto.ai>
9
9
  License-Expression: MIT
10
+ License-File: LICENSE
10
11
  Classifier: Development Status :: 3 - Alpha
11
12
  Classifier: Intended Audience :: Developers
12
13
  Classifier: License :: OSI Approved :: MIT License
@@ -117,6 +118,21 @@ update_job = client.jobs.create(
117
118
  document = client.documents.get(document_id)
118
119
  print(document.status)
119
120
 
121
+ chunks = client.documents.list_chunks(
122
+ document_id,
123
+ page=1,
124
+ page_size=50,
125
+ chunk_type="text",
126
+ )
127
+ print(chunks.pagination.total)
128
+ if chunks.chunks:
129
+ chunk = client.documents.get_chunk(
130
+ document_id,
131
+ chunks.chunks[0].id,
132
+ include_asset_urls=True,
133
+ )
134
+ print(chunk.chunk.content)
135
+
120
136
  client.documents.archive(document_id)
121
137
  ```
122
138
 
@@ -305,6 +321,12 @@ We publish stable releases to [PyPI](https://pypi.org/project/knowhere-python-sd
305
321
  - [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
306
322
  - [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
307
323
 
324
+ ## Community
325
+
326
+ - Contributing guide: [CONTRIBUTING.md](./CONTRIBUTING.md)
327
+ - Security policy: [SECURITY.md](./SECURITY.md)
328
+ - Code of conduct: [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md)
329
+
308
330
  ## License
309
331
 
310
332
  MIT
@@ -1,4 +1,4 @@
1
- knowhere/__init__.py,sha256=wicVid8SW7a3AqabHmHI6iIxpY5Tm732eMyQgBQ7zDM,3016
1
+ knowhere/__init__.py,sha256=pucs7krCP306K1iW7_3X-6kY81qJs9FT9H_jly3ZaSA,3297
2
2
  knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
3
3
  knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
4
4
  knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
@@ -6,7 +6,7 @@ knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
6
6
  knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
7
7
  knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
8
8
  knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
9
- knowhere/_version.py,sha256=ma0Xv9k49qOL337sii6xfWylMGz1MNXbCb4rszXekbo,50
9
+ knowhere/_version.py,sha256=KvboaHx7z0Uk_PqU7DuHMqxQA2PwFfFqlSA5cCqIgII,50
10
10
  knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
12
12
  knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
@@ -14,16 +14,17 @@ knowhere/lib/result_parser.py,sha256=dR3knoMq-AFMAe0M3l0YgOM-OrtSmofSLaKZO0tgYao
14
14
  knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
15
15
  knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
16
16
  knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
17
- knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
17
+ knowhere/resources/documents.py,sha256=itBkO3oud-ilo2tDOIeSB517OPVDVyfwSnPfHYYu23I,5695
18
18
  knowhere/resources/jobs.py,sha256=xYhgYP3Vz7SgGEckmXOvZocNru_4nsS4BoqquojncNw,9727
19
19
  knowhere/resources/retrieval.py,sha256=t_jFY-7wYfYVSH6e3WYgn0IaoaPcABXaeZoqcs-pUIo,4543
20
- knowhere/types/__init__.py,sha256=-T1Rx90y1W3kSW63v6QbXDgTO9aE097vx98xvRaYejU,1452
21
- knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
20
+ knowhere/types/__init__.py,sha256=qsfiUolOzimMMen6DkhqW9htAYBunWBwv0r1O3_Hatg,1733
21
+ knowhere/types/document.py,sha256=iWK528fjGNyW36GhNAz0rq3164JzaPpkA0_UiQwbESE,1997
22
22
  knowhere/types/job.py,sha256=VsLUFuELZo8rRemuekTbliTIwaD6CR_dAjgdSriPmw4,2472
23
23
  knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
24
24
  knowhere/types/result.py,sha256=uSpvOadmKOF5-n_uBTkmWAho2eDsOAUZoK_W96X2jeU,13143
25
25
  knowhere/types/retrieval.py,sha256=EopqmAx2DeO9AmEbd50emdu2mTbTxrhGoJ6DwvvoUCI,1090
26
26
  knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
27
- knowhere_python_sdk-0.3.1.dist-info/METADATA,sha256=VSSYe-vr9NLen7NEf-BZc4lT3OJF4bGtL3ivqYbItDk,8429
28
- knowhere_python_sdk-0.3.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
- knowhere_python_sdk-0.3.1.dist-info/RECORD,,
27
+ knowhere_python_sdk-0.4.0.dist-info/METADATA,sha256=tK9TAtuEueXJUracyIoeZdUzQXL8qSLzTbTmc4uBlQQ,8956
28
+ knowhere_python_sdk-0.4.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
+ knowhere_python_sdk-0.4.0.dist-info/licenses/LICENSE,sha256=jrRlxQDHyd_fTtIkQ_LlJV5AdlM_k_RFVPiJ3bTO6FQ,1070
30
+ knowhere_python_sdk-0.4.0.dist-info/RECORD,,
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Knowhere Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.