knowhere-python-sdk 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
knowhere/__init__.py CHANGED
@@ -22,6 +22,7 @@ from knowhere._exceptions import (
22
22
  ConflictError,
23
23
  GatewayTimeoutError,
24
24
  InternalServerError,
25
+ InvalidStateError,
25
26
  JobFailedError,
26
27
  KnowhereError,
27
28
  NotFoundError,
@@ -30,6 +31,7 @@ from knowhere._exceptions import (
30
31
  PollingTimeoutError,
31
32
  RateLimitError,
32
33
  ServiceUnavailableError,
34
+ ValidationError,
33
35
  )
34
36
  from knowhere._types import PollProgressCallback, UploadProgressCallback
35
37
  from knowhere._version import __version__
@@ -44,6 +46,10 @@ from knowhere.types.result import (
44
46
  ImageFileInfo,
45
47
  Manifest,
46
48
  ParseResult,
49
+ ProcessingCost,
50
+ ProcessingMetadata,
51
+ ProcessingTiming,
52
+ SlimChunk,
47
53
  Statistics,
48
54
  TableChunk,
49
55
  TableFileInfo,
@@ -58,6 +64,8 @@ __all__: list[str] = [
58
64
  "__version__",
59
65
  # Exceptions
60
66
  "KnowhereError",
67
+ "ValidationError",
68
+ "InvalidStateError",
61
69
  "APIConnectionError",
62
70
  "APITimeoutError",
63
71
  "APIStatusError",
@@ -87,6 +95,10 @@ __all__: list[str] = [
87
95
  "FileIndex",
88
96
  "ImageFileInfo",
89
97
  "TableFileInfo",
98
+ "ProcessingCost",
99
+ "ProcessingMetadata",
100
+ "ProcessingTiming",
101
+ "SlimChunk",
90
102
  "BaseChunk",
91
103
  "TextChunk",
92
104
  "ImageChunk",
knowhere/_base_client.py CHANGED
@@ -25,6 +25,7 @@ from knowhere._constants import (
25
25
  from knowhere._exceptions import (
26
26
  APIConnectionError,
27
27
  APITimeoutError,
28
+ ValidationError,
28
29
  makeStatusError,
29
30
  )
30
31
  from knowhere._logging import getLogger, redactSensitiveHeaders
@@ -35,17 +36,23 @@ T = TypeVar("T")
35
36
 
36
37
  _logger = getLogger()
37
38
 
38
- # Error codes that are safe to retry
39
- _RETRYABLE_ERROR_CODES: frozenset[str] = frozenset({
40
- "rate_limit_exceeded",
41
- "service_unavailable",
42
- "gateway_timeout",
43
- "internal_server_error",
44
- "timeout",
39
+ # Error codes that are always safe to retry (matches server ALWAYS_RETRYABLE_ERROR_CODES)
40
+ _ALWAYS_RETRYABLE_ERROR_CODES: frozenset[str] = frozenset({
41
+ "ABORTED", # 409 - Concurrency conflict
42
+ "UNAVAILABLE", # 503 - Service temporarily down
43
+ "DEADLINE_EXCEEDED", # 504 - Timeout
45
44
  })
46
45
 
47
- # Status codes that are safe to retry
48
- _RETRYABLE_STATUS_CODES: frozenset[int] = frozenset({408, 429, 500, 502, 503, 504})
46
+ # RESOURCE_EXHAUSTED (429) is conditionally retryable:
47
+ # - Rate limit: details.retry_after present RETRY
48
+ # - Quota exceeded: no retry_after → DO NOT RETRY
49
+ _CONDITIONALLY_RETRYABLE_ERROR_CODE: str = "RESOURCE_EXHAUSTED"
50
+
51
+ # HTTP status codes that are always safe to retry
52
+ _ALWAYS_RETRYABLE_STATUS_CODES: frozenset[int] = frozenset({409, 502, 503, 504})
53
+
54
+ # HTTP status code that is conditionally retryable (only with retry_after)
55
+ _CONDITIONALLY_RETRYABLE_STATUS_CODE: int = 429
49
56
 
50
57
 
51
58
  class BaseClient:
@@ -71,7 +78,7 @@ class BaseClient:
71
78
  # Resolve: arg > env > default
72
79
  resolved_key: Optional[str] = api_key or os.environ.get(ENV_API_KEY)
73
80
  if not resolved_key:
74
- raise ValueError(
81
+ raise ValidationError(
75
82
  "An API key must be provided via the 'api_key' argument "
76
83
  f"or the {ENV_API_KEY} environment variable."
77
84
  )
@@ -122,12 +129,68 @@ class BaseClient:
122
129
  self,
123
130
  status_code: int,
124
131
  error_code: Optional[str] = None,
125
- details: Optional[Any] = None,
132
+ details: Optional[Dict[str, Any]] = None,
126
133
  ) -> bool:
127
- """Decide whether a request should be retried."""
128
- if error_code and error_code in _RETRYABLE_ERROR_CODES:
134
+ """Decide whether a request should be retried.
135
+
136
+ Follows server-side retry semantics:
137
+ - ABORTED, UNAVAILABLE, DEADLINE_EXCEEDED → always retry
138
+ - RESOURCE_EXHAUSTED (429) → retry only if details.retry_after present
139
+ - All other errors → never retry
140
+ """
141
+ if error_code:
142
+ if error_code in _ALWAYS_RETRYABLE_ERROR_CODES:
143
+ return True
144
+ if error_code == _CONDITIONALLY_RETRYABLE_ERROR_CODE:
145
+ return self._hasRetryAfter(details)
146
+ return False
147
+
148
+ # Fallback to status code when error_code is unavailable
149
+ if status_code in _ALWAYS_RETRYABLE_STATUS_CODES:
129
150
  return True
130
- return status_code in _RETRYABLE_STATUS_CODES
151
+ if status_code == _CONDITIONALLY_RETRYABLE_STATUS_CODE:
152
+ return self._hasRetryAfter(details)
153
+ return False
154
+
155
+ @staticmethod
156
+ def _hasRetryAfter(details: Optional[Dict[str, Any]]) -> bool:
157
+ """Check if details contains a retry_after hint."""
158
+ if not isinstance(details, dict):
159
+ return False
160
+ retry_after: Any = details.get("retry_after")
161
+ return retry_after is not None
162
+
163
+ @staticmethod
164
+ def _extractRetryAfter(
165
+ error_body: Optional[Dict[str, Any]],
166
+ response: httpx.Response,
167
+ ) -> Optional[float]:
168
+ """Extract retry_after from the response body or Retry-After header.
169
+
170
+ The server puts retry_after in ``error.details.retry_after``.
171
+ Falls back to the HTTP ``Retry-After`` header.
172
+ """
173
+ # Prefer body: error.details.retry_after
174
+ if isinstance(error_body, dict):
175
+ err_obj: Any = error_body.get("error", error_body)
176
+ if isinstance(err_obj, dict):
177
+ details: Any = err_obj.get("details")
178
+ if isinstance(details, dict):
179
+ raw: Any = details.get("retry_after")
180
+ if raw is not None:
181
+ try:
182
+ return float(raw)
183
+ except (ValueError, TypeError):
184
+ pass
185
+
186
+ # Fallback: HTTP Retry-After header
187
+ header_raw: Optional[str] = response.headers.get("retry-after")
188
+ if header_raw is not None:
189
+ try:
190
+ return float(header_raw)
191
+ except (ValueError, TypeError):
192
+ pass
193
+ return None
131
194
 
132
195
  def _calculateRetryDelay(
133
196
  self,
@@ -257,24 +320,24 @@ class SyncAPIClient(BaseClient):
257
320
  response
258
321
  )
259
322
  error_code: Optional[str] = None
323
+ error_details: Optional[Dict[str, Any]] = None
260
324
  if isinstance(error_body, dict):
261
325
  err_obj: Any = error_body.get("error", error_body)
262
326
  if isinstance(err_obj, dict):
263
327
  error_code = err_obj.get("code")
328
+ raw_details: Any = err_obj.get("details")
329
+ if isinstance(raw_details, dict):
330
+ error_details = raw_details
264
331
 
265
332
  if (
266
333
  attempt < self.max_retries
267
- and self._shouldRetry(response.status_code, error_code)
334
+ and self._shouldRetry(
335
+ response.status_code, error_code, error_details
336
+ )
268
337
  ):
269
- retry_after_raw: Optional[str] = response.headers.get(
270
- "retry-after"
338
+ retry_after_val: Optional[float] = self._extractRetryAfter(
339
+ error_body, response
271
340
  )
272
- retry_after_val: Optional[float] = None
273
- if retry_after_raw:
274
- try:
275
- retry_after_val = float(retry_after_raw)
276
- except (ValueError, TypeError):
277
- pass
278
341
  delay = self._calculateRetryDelay(attempt, retry_after_val)
279
342
  _logger.warning(
280
343
  "Retryable error %d on attempt %d/%d, retrying in %.1fs",
@@ -404,22 +467,24 @@ class AsyncAPIClient(BaseClient):
404
467
 
405
468
  error_body: Optional[Dict[str, Any]] = self._parseErrorResponse(response)
406
469
  error_code: Optional[str] = None
470
+ error_details: Optional[Dict[str, Any]] = None
407
471
  if isinstance(error_body, dict):
408
472
  err_obj: Any = error_body.get("error", error_body)
409
473
  if isinstance(err_obj, dict):
410
474
  error_code = err_obj.get("code")
475
+ raw_details: Any = err_obj.get("details")
476
+ if isinstance(raw_details, dict):
477
+ error_details = raw_details
411
478
 
412
479
  if (
413
480
  attempt < self.max_retries
414
- and self._shouldRetry(response.status_code, error_code)
481
+ and self._shouldRetry(
482
+ response.status_code, error_code, error_details
483
+ )
415
484
  ):
416
- retry_after_raw: Optional[str] = response.headers.get("retry-after")
417
- retry_after_val: Optional[float] = None
418
- if retry_after_raw:
419
- try:
420
- retry_after_val = float(retry_after_raw)
421
- except (ValueError, TypeError):
422
- pass
485
+ retry_after_val: Optional[float] = self._extractRetryAfter(
486
+ error_body, response
487
+ )
423
488
  delay = self._calculateRetryDelay(attempt, retry_after_val)
424
489
  _logger.warning(
425
490
  "Retryable error %d on attempt %d/%d, retrying in %.1fs",
knowhere/_client.py CHANGED
@@ -13,6 +13,7 @@ from typing import BinaryIO, Optional, Union, overload
13
13
 
14
14
  from knowhere._base_client import AsyncAPIClient, SyncAPIClient
15
15
  from knowhere._constants import DEFAULT_POLL_INTERVAL, DEFAULT_POLL_TIMEOUT
16
+ from knowhere._exceptions import ValidationError
16
17
  from knowhere._logging import getLogger
17
18
  from knowhere._types import (
18
19
  PollProgressCallback,
@@ -94,9 +95,9 @@ class Knowhere(SyncAPIClient):
94
95
  Provide exactly one of *url* or *file*.
95
96
  """
96
97
  if url and file:
97
- raise ValueError("Provide either 'url' or 'file', not both.")
98
+ raise ValidationError("Provide either 'url' or 'file', not both.")
98
99
  if not url and file is None:
99
- raise ValueError("Provide either 'url' or 'file'.")
100
+ raise ValidationError("Provide either 'url' or 'file'.")
100
101
 
101
102
  # Determine source type and create job
102
103
  if url:
@@ -196,9 +197,9 @@ class AsyncKnowhere(AsyncAPIClient):
196
197
  ) -> ParseResult:
197
198
  """Parse a document end-to-end (async version)."""
198
199
  if url and file:
199
- raise ValueError("Provide either 'url' or 'file', not both.")
200
+ raise ValidationError("Provide either 'url' or 'file', not both.")
200
201
  if not url and file is None:
201
- raise ValueError("Provide either 'url' or 'file'.")
202
+ raise ValidationError("Provide either 'url' or 'file'.")
202
203
 
203
204
  if url:
204
205
  job: Job = await self.jobs.create(
knowhere/_constants.py CHANGED
@@ -18,6 +18,7 @@ DEFAULT_POLL_INTERVAL: float = 10.0
18
18
 
19
19
  # Retry configuration
20
20
  DEFAULT_MAX_RETRIES: int = 5
21
+ DEFAULT_UPLOAD_MAX_RETRIES: int = 2
21
22
 
22
23
  # Polling configuration
23
24
  MAX_POLL_INTERVAL: float = 30.0
knowhere/_exceptions.py CHANGED
@@ -41,6 +41,19 @@ class APITimeoutError(APIConnectionError):
41
41
  super().__init__(message)
42
42
 
43
43
 
44
+ # ---------------------------------------------------------------------------
45
+ # Validation / state
46
+ # ---------------------------------------------------------------------------
47
+
48
+
49
+ class ValidationError(KnowhereError):
50
+ """Raised when the caller provides invalid arguments."""
51
+
52
+
53
+ class InvalidStateError(KnowhereError):
54
+ """Raised when an object is in an unexpected state for the operation."""
55
+
56
+
44
57
  # ---------------------------------------------------------------------------
45
58
  # Polling / job errors
46
59
  # ---------------------------------------------------------------------------
@@ -161,9 +174,17 @@ class ConflictError(APIStatusError):
161
174
 
162
175
 
163
176
  class RateLimitError(APIStatusError):
164
- """HTTP 429 — includes optional ``retry_after`` hint."""
177
+ """HTTP 429 — includes optional rate limit hints from the server.
178
+
179
+ Attributes:
180
+ retry_after: Seconds to wait before retrying (``None`` for quota exceeded).
181
+ limit: Maximum allowed requests in the rate window.
182
+ period: Rate window unit (``"second"``, ``"minute"``, ``"hour"``, ``"day"``).
183
+ """
165
184
 
166
185
  retry_after: Optional[float]
186
+ limit: Optional[int]
187
+ period: Optional[str]
167
188
 
168
189
  def __init__(
169
190
  self,
@@ -176,6 +197,8 @@ class RateLimitError(APIStatusError):
176
197
  body: Optional[Any] = None,
177
198
  response: httpx.Response,
178
199
  retry_after: Optional[float] = None,
200
+ limit: Optional[int] = None,
201
+ period: Optional[str] = None,
179
202
  ) -> None:
180
203
  super().__init__(
181
204
  status_code,
@@ -187,6 +210,8 @@ class RateLimitError(APIStatusError):
187
210
  response=response,
188
211
  )
189
212
  self.retry_after = retry_after
213
+ self.limit = limit
214
+ self.period = period
190
215
 
191
216
 
192
217
  class InternalServerError(APIStatusError):
@@ -194,9 +219,17 @@ class InternalServerError(APIStatusError):
194
219
 
195
220
 
196
221
  class ServiceUnavailableError(APIStatusError):
197
- """HTTP 502 / 503 — includes optional ``retry_after`` hint."""
222
+ """HTTP 502 / 503 — includes optional rate limit hints from the server.
223
+
224
+ Attributes:
225
+ retry_after: Seconds to wait before retrying.
226
+ limit: Maximum allowed requests in the rate window (optional).
227
+ period: Rate window unit (optional).
228
+ """
198
229
 
199
230
  retry_after: Optional[float]
231
+ limit: Optional[int]
232
+ period: Optional[str]
200
233
 
201
234
  def __init__(
202
235
  self,
@@ -209,6 +242,8 @@ class ServiceUnavailableError(APIStatusError):
209
242
  body: Optional[Any] = None,
210
243
  response: httpx.Response,
211
244
  retry_after: Optional[float] = None,
245
+ limit: Optional[int] = None,
246
+ period: Optional[str] = None,
212
247
  ) -> None:
213
248
  super().__init__(
214
249
  status_code,
@@ -220,12 +255,22 @@ class ServiceUnavailableError(APIStatusError):
220
255
  response=response,
221
256
  )
222
257
  self.retry_after = retry_after
258
+ self.limit = limit
259
+ self.period = period
223
260
 
224
261
 
225
262
  class GatewayTimeoutError(APIStatusError):
226
- """HTTP 504 — includes optional ``retry_after`` hint."""
263
+ """HTTP 504 — includes optional rate limit hints from the server.
264
+
265
+ Attributes:
266
+ retry_after: Seconds to wait before retrying.
267
+ limit: Maximum allowed requests in the rate window (optional).
268
+ period: Rate window unit (optional).
269
+ """
227
270
 
228
271
  retry_after: Optional[float]
272
+ limit: Optional[int]
273
+ period: Optional[str]
229
274
 
230
275
  def __init__(
231
276
  self,
@@ -238,6 +283,8 @@ class GatewayTimeoutError(APIStatusError):
238
283
  body: Optional[Any] = None,
239
284
  response: httpx.Response,
240
285
  retry_after: Optional[float] = None,
286
+ limit: Optional[int] = None,
287
+ period: Optional[str] = None,
241
288
  ) -> None:
242
289
  super().__init__(
243
290
  status_code,
@@ -249,6 +296,8 @@ class GatewayTimeoutError(APIStatusError):
249
296
  response=response,
250
297
  )
251
298
  self.retry_after = retry_after
299
+ self.limit = limit
300
+ self.period = period
252
301
 
253
302
 
254
303
  # ---------------------------------------------------------------------------
@@ -298,14 +347,36 @@ def makeStatusError(
298
347
  status_code, APIStatusError
299
348
  )
300
349
 
301
- # Extract retry_after for classes that support it
350
+ # Extract retry hints for classes that support them
351
+ # Prefer body: error.details.retry_after, fallback to HTTP header
302
352
  retry_after: Optional[float] = None
303
- raw_retry: Optional[str] = response.headers.get("retry-after")
304
- if raw_retry is not None:
305
- try:
306
- retry_after = float(raw_retry)
307
- except (ValueError, TypeError):
308
- retry_after = None
353
+ limit: Optional[int] = None
354
+ period: Optional[str] = None
355
+
356
+ if isinstance(details, dict):
357
+ raw_body_retry: Any = details.get("retry_after")
358
+ if raw_body_retry is not None:
359
+ try:
360
+ retry_after = float(raw_body_retry)
361
+ except (ValueError, TypeError):
362
+ pass
363
+ raw_limit: Any = details.get("limit")
364
+ if raw_limit is not None:
365
+ try:
366
+ limit = int(raw_limit)
367
+ except (ValueError, TypeError):
368
+ pass
369
+ raw_period: Any = details.get("period")
370
+ if isinstance(raw_period, str):
371
+ period = raw_period
372
+
373
+ if retry_after is None:
374
+ raw_header_retry: Optional[str] = response.headers.get("retry-after")
375
+ if raw_header_retry is not None:
376
+ try:
377
+ retry_after = float(raw_header_retry)
378
+ except (ValueError, TypeError):
379
+ pass
309
380
 
310
381
  common_kwargs: Dict[str, Any] = dict(
311
382
  code=code,
@@ -316,9 +387,31 @@ def makeStatusError(
316
387
  response=response,
317
388
  )
318
389
 
319
- if exception_class in (RateLimitError, ServiceUnavailableError, GatewayTimeoutError):
320
- return exception_class(
321
- status_code, **common_kwargs, retry_after=retry_after # type: ignore[call-arg]
390
+ if exception_class is RateLimitError:
391
+ return RateLimitError(
392
+ status_code,
393
+ **common_kwargs,
394
+ retry_after=retry_after,
395
+ limit=limit,
396
+ period=period,
397
+ )
398
+
399
+ if exception_class is ServiceUnavailableError:
400
+ return ServiceUnavailableError(
401
+ status_code,
402
+ **common_kwargs,
403
+ retry_after=retry_after,
404
+ limit=limit,
405
+ period=period,
406
+ )
407
+
408
+ if exception_class is GatewayTimeoutError:
409
+ return GatewayTimeoutError(
410
+ status_code,
411
+ **common_kwargs,
412
+ retry_after=retry_after,
413
+ limit=limit,
414
+ period=period,
322
415
  )
323
416
 
324
417
  return exception_class(status_code, **common_kwargs)
knowhere/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.0" # x-release-please-version
1
+ __version__ = "0.2.1" # x-release-please-version
@@ -16,8 +16,10 @@ from knowhere.types.result import (
16
16
  ImageChunk,
17
17
  Manifest,
18
18
  ParseResult,
19
+ SlimChunk,
19
20
  TableChunk,
20
21
  TextChunk,
22
+ TextChunkTokens,
21
23
  )
22
24
 
23
25
  _logger = getLogger()
@@ -79,6 +81,38 @@ def _extractFilePath(raw: Dict[str, Any]) -> Optional[str]:
79
81
  return fallback
80
82
 
81
83
 
84
+ def _normalizeTokenList(raw_tokens: List[Any]) -> List[str]:
85
+ """Return a string-only token list with empty values removed."""
86
+ normalized_tokens: List[str] = []
87
+ for raw_token in raw_tokens:
88
+ token_text: str = str(raw_token).strip()
89
+ if token_text:
90
+ normalized_tokens.append(token_text)
91
+ return normalized_tokens
92
+
93
+
94
+ def _parseTextChunkTokens(
95
+ raw_tokens: Any,
96
+ *,
97
+ chunk_id: str,
98
+ ) -> Optional[TextChunkTokens]:
99
+ """Normalize text chunk tokens from the current backend payload."""
100
+ if raw_tokens is None:
101
+ return None
102
+ if isinstance(raw_tokens, bool):
103
+ raise KnowhereError(
104
+ f"Invalid tokens payload for text chunk '{chunk_id}': expected list[str], got bool."
105
+ )
106
+ if isinstance(raw_tokens, list):
107
+ return _normalizeTokenList(raw_tokens)
108
+
109
+ raise KnowhereError(
110
+ "Invalid tokens payload for text chunk "
111
+ f"'{chunk_id}': expected list[str], "
112
+ f"got {type(raw_tokens).__name__}."
113
+ )
114
+
115
+
82
116
  def _buildChunks(
83
117
  raw_chunks: List[Dict[str, Any]],
84
118
  zf: zipfile.ZipFile,
@@ -101,6 +135,7 @@ def _buildChunks(
101
135
  type="image",
102
136
  content=raw.get("content", ""),
103
137
  path=raw.get("path"),
138
+ page_nums=metadata.get("page_nums", raw.get("page_nums")),
104
139
  length=metadata.get("length", raw.get("length", 0)),
105
140
  file_path=file_path,
106
141
  original_name=metadata.get("original_name", raw.get("original_name")),
@@ -118,6 +153,7 @@ def _buildChunks(
118
153
  type="table",
119
154
  content=raw.get("content", ""),
120
155
  path=raw.get("path"),
156
+ page_nums=metadata.get("page_nums", raw.get("page_nums")),
121
157
  length=metadata.get("length", raw.get("length", 0)),
122
158
  file_path=file_path,
123
159
  original_name=metadata.get("original_name", raw.get("original_name")),
@@ -127,15 +163,19 @@ def _buildChunks(
127
163
  )
128
164
  else:
129
165
  metadata = raw.get("metadata", {})
166
+ chunk_id: str = raw.get("chunk_id", "")
167
+ raw_tokens: Any = metadata.get("tokens", raw.get("tokens"))
130
168
  chunk = TextChunk(
131
- chunk_id=raw.get("chunk_id", ""),
169
+ chunk_id=chunk_id,
132
170
  type="text",
133
171
  content=raw.get("content", ""),
134
172
  path=raw.get("path"),
173
+ page_nums=metadata.get("page_nums", raw.get("page_nums")),
135
174
  length=metadata.get("length", raw.get("length", 0)),
136
- tokens=metadata.get("tokens", raw.get("tokens")),
175
+ tokens=_parseTextChunkTokens(raw_tokens, chunk_id=chunk_id),
137
176
  keywords=metadata.get("keywords", raw.get("keywords")),
138
177
  summary=metadata.get("summary", raw.get("summary")),
178
+ connect_to=metadata.get("connect_to", raw.get("connect_to")),
139
179
  relationships=metadata.get("relationships", raw.get("relationships")),
140
180
  )
141
181
 
@@ -195,12 +235,39 @@ def parseResultZip(
195
235
  json.loads(hierarchy_text) if hierarchy_text else None
196
236
  )
197
237
 
238
+ # -- Optimized sidecar files --
239
+ chunks_slim_text: Optional[str] = _readZipText(zf, "chunks_slim.json")
240
+ parsed_chunks_slim: Any = json.loads(chunks_slim_text) if chunks_slim_text else None
241
+ if isinstance(parsed_chunks_slim, dict) and "chunks" in parsed_chunks_slim:
242
+ raw_chunks_slim: List[Dict[str, Any]] = parsed_chunks_slim["chunks"]
243
+ elif isinstance(parsed_chunks_slim, list):
244
+ raw_chunks_slim = parsed_chunks_slim
245
+ else:
246
+ raw_chunks_slim = []
247
+ chunks_slim: Optional[List[SlimChunk]] = (
248
+ [SlimChunk.model_validate(chunk) for chunk in raw_chunks_slim]
249
+ if chunks_slim_text is not None
250
+ else None
251
+ )
252
+
253
+ toc_hierarchies_text: Optional[str] = _readZipText(zf, "toc_hierarchies.json")
254
+ toc_hierarchies: Optional[Any] = (
255
+ json.loads(toc_hierarchies_text) if toc_hierarchies_text else None
256
+ )
257
+
258
+ kb_csv: Optional[str] = _readZipText(zf, "kb.csv")
259
+ hierarchy_view_html: Optional[str] = _readZipText(zf, "hierarchy_view.html")
260
+
198
261
  zf.close()
199
262
 
200
263
  return ParseResult(
201
264
  manifest=manifest,
202
265
  chunks=chunks,
266
+ chunks_slim=chunks_slim,
203
267
  full_markdown=full_markdown,
204
268
  hierarchy=hierarchy,
269
+ toc_hierarchies=toc_hierarchies,
270
+ kb_csv=kb_csv,
271
+ hierarchy_view_html=hierarchy_view_html,
205
272
  raw_zip=zip_bytes,
206
273
  )