knowhere-python-sdk 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
knowhere/__init__.py CHANGED
@@ -35,7 +35,15 @@ from knowhere._exceptions import (
35
35
  )
36
36
  from knowhere._types import PollProgressCallback, UploadProgressCallback
37
37
  from knowhere._version import __version__
38
- from knowhere.types.document import Document, DocumentListResponse
38
+ from knowhere.types.document import (
39
+ Document,
40
+ DocumentChunk,
41
+ DocumentChunkListResponse,
42
+ DocumentChunkPagination,
43
+ DocumentChunkResponse,
44
+ DocumentChunkType,
45
+ DocumentListResponse,
46
+ )
39
47
  from knowhere.types.job import Job, JobError, JobProgress, JobResult
40
48
  from knowhere.types.params import ParsingParams, WebhookConfig
41
49
  from knowhere.types.retrieval import (
@@ -98,6 +106,11 @@ __all__: list[str] = [
98
106
  "JobResult",
99
107
  # Document types
100
108
  "Document",
109
+ "DocumentChunk",
110
+ "DocumentChunkListResponse",
111
+ "DocumentChunkPagination",
112
+ "DocumentChunkResponse",
113
+ "DocumentChunkType",
101
114
  "DocumentListResponse",
102
115
  # Retrieval types
103
116
  "RetrievalChannel",
knowhere/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.2" # x-release-please-version
1
+ __version__ = "0.4.0" # x-release-please-version
@@ -5,7 +5,13 @@ from __future__ import annotations
5
5
  from typing import Any, Dict, Optional
6
6
 
7
7
  from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
- from knowhere.types.document import Document, DocumentListResponse
8
+ from knowhere.types.document import (
9
+ Document,
10
+ DocumentChunkListResponse,
11
+ DocumentChunkResponse,
12
+ DocumentChunkType,
13
+ DocumentListResponse,
14
+ )
9
15
 
10
16
 
11
17
  class Documents(SyncAPIResource):
@@ -32,6 +38,49 @@ class Documents(SyncAPIResource):
32
38
  cast_to=Document,
33
39
  )
34
40
 
41
+ def list_chunks(
42
+ self,
43
+ document_id: str,
44
+ *,
45
+ page: int = 1,
46
+ page_size: int = 50,
47
+ chunk_type: Optional[DocumentChunkType] = None,
48
+ include_asset_urls: bool = False,
49
+ ) -> DocumentChunkListResponse:
50
+ """List current-revision chunks for one canonical document."""
51
+ params: Dict[str, Any] = _build_chunk_list_params(
52
+ page=page,
53
+ page_size=page_size,
54
+ chunk_type=chunk_type,
55
+ include_asset_urls=include_asset_urls,
56
+ )
57
+
58
+ return self._request(
59
+ "GET",
60
+ f"v1/documents/{document_id}/chunks",
61
+ params=params or None,
62
+ cast_to=DocumentChunkListResponse,
63
+ )
64
+
65
+ def get_chunk(
66
+ self,
67
+ document_id: str,
68
+ document_chunk_id: str,
69
+ *,
70
+ include_asset_urls: bool = False,
71
+ ) -> DocumentChunkResponse:
72
+ """Get one current-revision chunk for one canonical document."""
73
+ params: Dict[str, Any] = _build_chunk_get_params(
74
+ include_asset_urls=include_asset_urls,
75
+ )
76
+
77
+ return self._request(
78
+ "GET",
79
+ f"v1/documents/{document_id}/chunks/{document_chunk_id}",
80
+ params=params or None,
81
+ cast_to=DocumentChunkResponse,
82
+ )
83
+
35
84
  def archive(self, document_id: str) -> Document:
36
85
  """Archive one canonical document by ID."""
37
86
  return self._request(
@@ -65,6 +114,49 @@ class AsyncDocuments(AsyncAPIResource):
65
114
  cast_to=Document,
66
115
  )
67
116
 
117
+ async def list_chunks(
118
+ self,
119
+ document_id: str,
120
+ *,
121
+ page: int = 1,
122
+ page_size: int = 50,
123
+ chunk_type: Optional[DocumentChunkType] = None,
124
+ include_asset_urls: bool = False,
125
+ ) -> DocumentChunkListResponse:
126
+ """List current-revision chunks for one canonical document."""
127
+ params: Dict[str, Any] = _build_chunk_list_params(
128
+ page=page,
129
+ page_size=page_size,
130
+ chunk_type=chunk_type,
131
+ include_asset_urls=include_asset_urls,
132
+ )
133
+
134
+ return await self._request(
135
+ "GET",
136
+ f"v1/documents/{document_id}/chunks",
137
+ params=params or None,
138
+ cast_to=DocumentChunkListResponse,
139
+ )
140
+
141
+ async def get_chunk(
142
+ self,
143
+ document_id: str,
144
+ document_chunk_id: str,
145
+ *,
146
+ include_asset_urls: bool = False,
147
+ ) -> DocumentChunkResponse:
148
+ """Get one current-revision chunk for one canonical document."""
149
+ params: Dict[str, Any] = _build_chunk_get_params(
150
+ include_asset_urls=include_asset_urls,
151
+ )
152
+
153
+ return await self._request(
154
+ "GET",
155
+ f"v1/documents/{document_id}/chunks/{document_chunk_id}",
156
+ params=params or None,
157
+ cast_to=DocumentChunkResponse,
158
+ )
159
+
68
160
  async def archive(self, document_id: str) -> Document:
69
161
  """Archive one canonical document by ID."""
70
162
  return await self._request(
@@ -72,3 +164,28 @@ class AsyncDocuments(AsyncAPIResource):
72
164
  f"v1/documents/{document_id}/archive",
73
165
  cast_to=Document,
74
166
  )
167
+
168
+
169
+ def _build_chunk_list_params(
170
+ *,
171
+ page: int,
172
+ page_size: int,
173
+ chunk_type: Optional[DocumentChunkType],
174
+ include_asset_urls: bool,
175
+ ) -> Dict[str, Any]:
176
+ params: Dict[str, Any] = {}
177
+ if page != 1:
178
+ params["page"] = page
179
+ if page_size != 50:
180
+ params["page_size"] = page_size
181
+ if chunk_type is not None:
182
+ params["chunk_type"] = chunk_type
183
+ if include_asset_urls:
184
+ params["include_asset_urls"] = True
185
+ return params
186
+
187
+
188
+ def _build_chunk_get_params(*, include_asset_urls: bool) -> Dict[str, Any]:
189
+ if not include_asset_urls:
190
+ return {}
191
+ return {"include_asset_urls": True}
@@ -2,7 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from knowhere.types.document import Document, DocumentListResponse
5
+ from knowhere.types.document import (
6
+ Document,
7
+ DocumentChunk,
8
+ DocumentChunkListResponse,
9
+ DocumentChunkPagination,
10
+ DocumentChunkResponse,
11
+ DocumentChunkType,
12
+ DocumentListResponse,
13
+ )
6
14
  from knowhere.types.job import Job, JobError, JobResult
7
15
  from knowhere.types.params import ParsingParams, WebhookConfig
8
16
  from knowhere.types.retrieval import (
@@ -39,6 +47,11 @@ __all__: list[str] = [
39
47
  "JobResult",
40
48
  # document
41
49
  "Document",
50
+ "DocumentChunk",
51
+ "DocumentChunkListResponse",
52
+ "DocumentChunkPagination",
53
+ "DocumentChunkResponse",
54
+ "DocumentChunkType",
42
55
  "DocumentListResponse",
43
56
  # retrieval
44
57
  "RetrievalChannel",
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from datetime import datetime
6
- from typing import Optional
6
+ from typing import Any, Dict, Literal, Optional
7
7
 
8
8
  from pydantic import BaseModel
9
9
 
@@ -26,3 +26,53 @@ class DocumentListResponse(BaseModel):
26
26
 
27
27
  namespace: str
28
28
  documents: list[Document]
29
+
30
+
31
+ DocumentChunkType = Literal["text", "image", "table"]
32
+
33
+
34
+ class DocumentChunkPagination(BaseModel):
35
+ """Pagination metadata returned by document chunk list endpoints."""
36
+
37
+ page: int
38
+ page_size: int
39
+ total: int
40
+ total_pages: int
41
+
42
+
43
+ class DocumentChunk(BaseModel):
44
+ """One current-revision document chunk."""
45
+
46
+ id: str
47
+ chunk_id: str
48
+ chunk_type: DocumentChunkType
49
+ content: Optional[str] = None
50
+ section_id: Optional[str] = None
51
+ section_path: Optional[str] = None
52
+ source_chunk_path: Optional[str] = None
53
+ file_path: Optional[str] = None
54
+ sort_order: int
55
+ metadata: Dict[str, Any]
56
+ asset_url: Optional[str] = None
57
+ created_at: Optional[datetime] = None
58
+
59
+
60
+ class DocumentChunkListResponse(BaseModel):
61
+ """Response from ``GET /v1/documents/{document_id}/chunks``."""
62
+
63
+ document_id: str
64
+ namespace: str
65
+ job_result_id: Optional[str] = None
66
+ job_id: Optional[str] = None
67
+ chunks: list[DocumentChunk]
68
+ pagination: DocumentChunkPagination
69
+
70
+
71
+ class DocumentChunkResponse(BaseModel):
72
+ """Response from ``GET /v1/documents/{document_id}/chunks/{chunk_id}``."""
73
+
74
+ document_id: str
75
+ namespace: str
76
+ job_result_id: Optional[str] = None
77
+ job_id: Optional[str] = None
78
+ chunk: DocumentChunk
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowhere-python-sdk
3
- Version: 0.3.2
3
+ Version: 0.4.0
4
4
  Summary: Official Python SDK for the Knowhere document parsing API
5
5
  Project-URL: Homepage, https://knowhereto.ai
6
6
  Project-URL: Documentation, https://docs.knowhereto.ai
@@ -118,6 +118,21 @@ update_job = client.jobs.create(
118
118
  document = client.documents.get(document_id)
119
119
  print(document.status)
120
120
 
121
+ chunks = client.documents.list_chunks(
122
+ document_id,
123
+ page=1,
124
+ page_size=50,
125
+ chunk_type="text",
126
+ )
127
+ print(chunks.pagination.total)
128
+ if chunks.chunks:
129
+ chunk = client.documents.get_chunk(
130
+ document_id,
131
+ chunks.chunks[0].id,
132
+ include_asset_urls=True,
133
+ )
134
+ print(chunk.chunk.content)
135
+
121
136
  client.documents.archive(document_id)
122
137
  ```
123
138
 
@@ -1,4 +1,4 @@
1
- knowhere/__init__.py,sha256=wicVid8SW7a3AqabHmHI6iIxpY5Tm732eMyQgBQ7zDM,3016
1
+ knowhere/__init__.py,sha256=pucs7krCP306K1iW7_3X-6kY81qJs9FT9H_jly3ZaSA,3297
2
2
  knowhere/_base_client.py,sha256=ddeRR1lWLhes5ipvYX6-TMEecjjiEBGfQdPw_vnSNqA,17978
3
3
  knowhere/_client.py,sha256=WYb-Fhi3x3nQYNfQG9eCgOpLc_wVyAawfPZWdZhFESg,9586
4
4
  knowhere/_constants.py,sha256=ZNCFQC00NpUZIyc_XZ0uemjJE-E8uKAbv3BDa3po9cg,885
@@ -6,7 +6,7 @@ knowhere/_exceptions.py,sha256=NflH7phh_bNFOJmQ758V4mZCAFQskpGXACMz2JIfFNU,11896
6
6
  knowhere/_logging.py,sha256=tNqEA1dLv-adTT6qRq5RBeO35FoWrnS3gwt7gKChLTA,1376
7
7
  knowhere/_response.py,sha256=EsrM794qxCykvl82UkszeqjJzm9_OSq7nsyzaSCnx0I,1415
8
8
  knowhere/_types.py,sha256=8-JFaRcxgBJbw2mV9BwnmCktFVph41a1mduwtXlYidI,1775
9
- knowhere/_version.py,sha256=eN28KXRy0VvgUkjSTUYstuIdAhhpG6cgufP7uWuf12w,50
9
+ knowhere/_version.py,sha256=KvboaHx7z0Uk_PqU7DuHMqxQA2PwFfFqlSA5cCqIgII,50
10
10
  knowhere/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  knowhere/lib/__init__.py,sha256=e953V5ny3VmDtCw7y_4uPwdTkwwNpe_Y6o4AEgz3ujw,50
12
12
  knowhere/lib/polling.py,sha256=s0EPHozAvNhXLqr5uwU8YXkkwAdF0ji_nIN0QfR6avY,4500
@@ -14,17 +14,17 @@ knowhere/lib/result_parser.py,sha256=dR3knoMq-AFMAe0M3l0YgOM-OrtSmofSLaKZO0tgYao
14
14
  knowhere/lib/upload.py,sha256=eT-O9_wB2WkWUAsUd7VzaKY6DVfNeA6WMHRdwm0HM0o,7849
15
15
  knowhere/resources/__init__.py,sha256=ClsR-yn_0E4KOopD_Yq13wbPHHjl9s15XpydN-d2Rzo,393
16
16
  knowhere/resources/_base.py,sha256=tgKphNTsgMhktWp6_rhyVOZyee4CYlDmD5O1_jWVvYo,1829
17
- knowhere/resources/documents.py,sha256=u_gmrElvpMOABaHkEuTyaYvh4D_CG4pHZt23r8tivaY,2314
17
+ knowhere/resources/documents.py,sha256=itBkO3oud-ilo2tDOIeSB517OPVDVyfwSnPfHYYu23I,5695
18
18
  knowhere/resources/jobs.py,sha256=xYhgYP3Vz7SgGEckmXOvZocNru_4nsS4BoqquojncNw,9727
19
19
  knowhere/resources/retrieval.py,sha256=t_jFY-7wYfYVSH6e3WYgn0IaoaPcABXaeZoqcs-pUIo,4543
20
- knowhere/types/__init__.py,sha256=-T1Rx90y1W3kSW63v6QbXDgTO9aE097vx98xvRaYejU,1452
21
- knowhere/types/document.py,sha256=LbFleglvm538vSDDho82j7fVxvgMXdIVm9wrWemLShY,711
20
+ knowhere/types/__init__.py,sha256=qsfiUolOzimMMen6DkhqW9htAYBunWBwv0r1O3_Hatg,1733
21
+ knowhere/types/document.py,sha256=iWK528fjGNyW36GhNAz0rq3164JzaPpkA0_UiQwbESE,1997
22
22
  knowhere/types/job.py,sha256=VsLUFuELZo8rRemuekTbliTIwaD6CR_dAjgdSriPmw4,2472
23
23
  knowhere/types/params.py,sha256=7DyBd4xMxtLPch-A1130-gI0ajKOv2G5tbSMkE8n6-E,543
24
24
  knowhere/types/result.py,sha256=uSpvOadmKOF5-n_uBTkmWAho2eDsOAUZoK_W96X2jeU,13143
25
25
  knowhere/types/retrieval.py,sha256=EopqmAx2DeO9AmEbd50emdu2mTbTxrhGoJ6DwvvoUCI,1090
26
26
  knowhere/types/shared.py,sha256=K5ezX212othxgCviiE2WnwWFY2MS08pXKJ8Km1ZWmjc,104
27
- knowhere_python_sdk-0.3.2.dist-info/METADATA,sha256=Z12Y7vX6r6HeKg1DRw-J1isGwCo3Dt_PmSeJ5BRXU8g,8635
28
- knowhere_python_sdk-0.3.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
- knowhere_python_sdk-0.3.2.dist-info/licenses/LICENSE,sha256=jrRlxQDHyd_fTtIkQ_LlJV5AdlM_k_RFVPiJ3bTO6FQ,1070
30
- knowhere_python_sdk-0.3.2.dist-info/RECORD,,
27
+ knowhere_python_sdk-0.4.0.dist-info/METADATA,sha256=tK9TAtuEueXJUracyIoeZdUzQXL8qSLzTbTmc4uBlQQ,8956
28
+ knowhere_python_sdk-0.4.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
29
+ knowhere_python_sdk-0.4.0.dist-info/licenses/LICENSE,sha256=jrRlxQDHyd_fTtIkQ_LlJV5AdlM_k_RFVPiJ3bTO6FQ,1070
30
+ knowhere_python_sdk-0.4.0.dist-info/RECORD,,