cohere-compass-sdk 2.2.0__tar.gz → 2.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/PKG-INFO +1 -1
  2. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/clients/compass.py +28 -11
  3. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/clients/compass_async.py +30 -9
  4. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/clients/parser.py +25 -7
  5. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/clients/parser_async.py +25 -3
  6. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/models/config.py +16 -3
  7. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/models/documents.py +1 -1
  8. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/pyproject.toml +2 -2
  9. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/LICENSE +0 -0
  10. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/README.md +0 -0
  11. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/__init__.py +0 -0
  12. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/clients/__init__.py +0 -0
  13. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/clients/access_control.py +0 -0
  14. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/constants.py +0 -0
  15. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/exceptions.py +0 -0
  16. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/models/__init__.py +0 -0
  17. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/models/access_control.py +0 -0
  18. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/models/datasources.py +0 -0
  19. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/models/indexes.py +0 -0
  20. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/models/search.py +0 -0
  21. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/py.typed +0 -0
  22. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/utils/asyn.py +0 -0
  23. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/utils/documents.py +0 -0
  24. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/utils/fs.py +0 -0
  25. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/utils/iter.py +0 -0
  26. {cohere_compass_sdk-2.2.0 → cohere_compass_sdk-2.2.2}/cohere_compass/utils/retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cohere-compass-sdk
3
- Version: 2.2.0
3
+ Version: 2.2.2
4
4
  Summary: Cohere Compass SDK
5
5
  Requires-Python: >=3.11,<4.0
6
6
  Classifier: Programming Language :: Python :: 3
@@ -18,6 +18,7 @@ from collections.abc import Iterable
18
18
  from dataclasses import dataclass
19
19
  from datetime import timedelta
20
20
  from statistics import mean
21
+ from types import TracebackType
21
22
  from typing import Any, Literal
22
23
 
23
24
  # 3rd party imports
@@ -267,9 +268,9 @@ class CompassClient:
267
268
  if httpx_client.timeout.read
268
269
  else DEFAULT_COMPASS_CLIENT_TIMEOUT
269
270
  )
270
- self.httpx_client = httpx_client or httpx.Client(
271
- timeout=self.timeout.total_seconds()
272
- )
271
+ self.httpx = httpx_client or httpx.Client(timeout=self.timeout.total_seconds())
272
+ self._own_httpx_client = httpx_client is None
273
+ self._closed = False
273
274
 
274
275
  self.bearer_token = bearer_token
275
276
 
@@ -281,8 +282,23 @@ class CompassClient:
281
282
  self.retry_wait = retry_wait
282
283
 
283
284
  def close(self):
284
- """Close the HTTP client connection."""
285
- self.httpx_client.close()
285
+ """Close the httpx client if it was created by this instance."""
286
+ if self._own_httpx_client and not self._closed:
287
+ self.httpx.close()
288
+ self._closed = True
289
+
290
+ def __enter__(self):
291
+ """For use by "with" statements."""
292
+ return self
293
+
294
+ def __exit__(
295
+ self,
296
+ exc_type: type[BaseException] | None,
297
+ exc_value: BaseException | None,
298
+ traceback: TracebackType | None,
299
+ ) -> None:
300
+ """For use by "with" statements."""
301
+ self.close()
286
302
 
287
303
  def get_models(
288
304
  self,
@@ -615,10 +631,10 @@ class CompassClient:
615
631
  index_name: str,
616
632
  filename: str,
617
633
  filebytes: bytes,
618
- content_type: ContentTypeEnum,
619
634
  document_id: str,
620
635
  attributes: DocumentAttributes = DocumentAttributes(),
621
636
  config: ParseableDocumentConfig = ParseableDocumentConfig(),
637
+ content_type: ContentTypeEnum | None = None,
622
638
  max_retries: int | None = None,
623
639
  retry_wait: timedelta | None = None,
624
640
  timeout: timedelta | None = None,
@@ -629,7 +645,8 @@ class CompassClient:
629
645
  :param index_name: The name of the index.
630
646
  :param filename: The filename of the document.
631
647
  :param filebytes: The raw bytes of the document.
632
- :param content_type: The content type of the document.
648
+ :param content_type: optional content type of the document.
649
+ Recommended to pass it otherwise auto-detected.
633
650
  :param document_id: The ID to assign to the document.
634
651
  :param attributes: Additional attributes to add to the document.
635
652
  :param config: Configuration for the document parsing.
@@ -1392,27 +1409,27 @@ class CompassClient:
1392
1409
  headers = {"Authorization": f"Bearer {self.bearer_token}"}
1393
1410
 
1394
1411
  if http_method == "GET":
1395
- response = self.httpx_client.get(
1412
+ response = self.httpx.get(
1396
1413
  target_path,
1397
1414
  headers=headers,
1398
1415
  timeout=timeout.total_seconds(),
1399
1416
  )
1400
1417
  elif http_method == "POST":
1401
- response = self.httpx_client.post(
1418
+ response = self.httpx.post(
1402
1419
  target_path,
1403
1420
  json=data_dict,
1404
1421
  headers=headers,
1405
1422
  timeout=timeout.total_seconds(),
1406
1423
  )
1407
1424
  elif http_method == "PUT":
1408
- response = self.httpx_client.put(
1425
+ response = self.httpx.put(
1409
1426
  target_path,
1410
1427
  json=data_dict,
1411
1428
  headers=headers,
1412
1429
  timeout=timeout.total_seconds(),
1413
1430
  )
1414
1431
  elif http_method == "DELETE":
1415
- response = self.httpx_client.delete(
1432
+ response = self.httpx.delete(
1416
1433
  target_path,
1417
1434
  headers=headers,
1418
1435
  timeout=timeout.total_seconds(),
@@ -14,6 +14,7 @@ from collections import deque
14
14
  from collections.abc import AsyncIterable, Iterable
15
15
  from datetime import timedelta
16
16
  from statistics import mean
17
+ from types import TracebackType
17
18
  from typing import Any, Literal
18
19
 
19
20
  # 3rd party imports
@@ -133,9 +134,11 @@ class CompassAsyncClient:
133
134
  if httpx_client.timeout.read
134
135
  else DEFAULT_COMPASS_CLIENT_TIMEOUT
135
136
  )
136
- self.httpx_client = httpx_client or httpx.AsyncClient(
137
+ self.httpx = httpx_client or httpx.AsyncClient(
137
138
  timeout=self.timeout.total_seconds()
138
139
  )
140
+ self._own_httpx_client = httpx_client is None
141
+ self._closed = False
139
142
 
140
143
  self.bearer_token = bearer_token
141
144
 
@@ -149,8 +152,25 @@ class CompassAsyncClient:
149
152
  self.retry_wait = retry_wait
150
153
 
151
154
  async def aclose(self):
152
- """Close the HTTP client."""
153
- await self.httpx_client.aclose()
155
+ """Close the httpx client if it was created by the CompassAsyncClient."""
156
+ if self._own_httpx_client and not self._closed:
157
+ await self.httpx.aclose()
158
+ self._closed = True
159
+
160
+ close = aclose # Alias for consistency with sync client
161
+
162
+ async def __aenter__(self):
163
+ """For use by "async with" statements."""
164
+ return self
165
+
166
+ async def __aexit__(
167
+ self,
168
+ exc_type: type[BaseException] | None,
169
+ exc_value: BaseException | None,
170
+ traceback: TracebackType | None,
171
+ ) -> None:
172
+ """For use by "async with" statements."""
173
+ await self.aclose()
154
174
 
155
175
  async def get_models(
156
176
  self,
@@ -470,10 +490,10 @@ class CompassAsyncClient:
470
490
  index_name: str,
471
491
  filename: str,
472
492
  filebytes: bytes,
473
- content_type: ContentTypeEnum,
474
493
  document_id: str,
475
494
  attributes: DocumentAttributes = DocumentAttributes(),
476
495
  config: ParseableDocumentConfig = ParseableDocumentConfig(),
496
+ content_type: ContentTypeEnum | None = None,
477
497
  max_retries: int | None = None,
478
498
  retry_wait: timedelta | None = None,
479
499
  timeout: timedelta | None = None,
@@ -484,7 +504,8 @@ class CompassAsyncClient:
484
504
  :param index_name: the name of the index
485
505
  :param filename: the filename of the document
486
506
  :param filebytes: the bytes of the document
487
- :param content_type: the content type of the document
507
+ :param content_type: optional content type of the document.
508
+ Recommended to pass it otherwise auto-detected.
488
509
  :param document_id: the id of the document (optional)
489
510
  :param attributes: Additional attributes to add to the document.
490
511
  :param config: Configuration for the document parsing.
@@ -1265,27 +1286,27 @@ class CompassAsyncClient:
1265
1286
  headers = {"Authorization": f"Bearer {self.bearer_token}"}
1266
1287
 
1267
1288
  if http_method == "GET":
1268
- response = await self.httpx_client.get(
1289
+ response = await self.httpx.get(
1269
1290
  target_path,
1270
1291
  headers=headers,
1271
1292
  timeout=timeout.total_seconds(),
1272
1293
  )
1273
1294
  elif http_method == "POST":
1274
- response = await self.httpx_client.post(
1295
+ response = await self.httpx.post(
1275
1296
  target_path,
1276
1297
  json=data_dict,
1277
1298
  headers=headers,
1278
1299
  timeout=timeout.total_seconds(),
1279
1300
  )
1280
1301
  elif http_method == "PUT":
1281
- response = await self.httpx_client.put(
1302
+ response = await self.httpx.put(
1282
1303
  target_path,
1283
1304
  json=data_dict,
1284
1305
  headers=headers,
1285
1306
  timeout=timeout.total_seconds(),
1286
1307
  )
1287
1308
  elif http_method == "DELETE":
1288
- response = await self.httpx_client.delete(
1309
+ response = await self.httpx.delete(
1289
1310
  target_path,
1290
1311
  headers=headers,
1291
1312
  timeout=timeout.total_seconds(),
@@ -12,6 +12,7 @@ import logging
12
12
  from collections.abc import Callable, Iterable
13
13
  from concurrent.futures import ThreadPoolExecutor
14
14
  from datetime import timedelta
15
+ from types import TracebackType
15
16
  from typing import Any
16
17
 
17
18
  # 3rd party imports
@@ -42,9 +43,7 @@ from cohere_compass.models import (
42
43
  )
43
44
  from cohere_compass.utils.fs import open_document, scan_folder
44
45
  from cohere_compass.utils.iter import imap_parallel
45
- from cohere_compass.utils.retry import (
46
- is_retryable_compass_exception,
47
- )
46
+ from cohere_compass.utils.retry import is_retryable_compass_exception
48
47
 
49
48
  Fn_or_Dict = dict[str, Any] | Callable[[CompassDocument], dict[str, Any]]
50
49
 
@@ -123,15 +122,34 @@ class CompassParserClient:
123
122
  if httpx_client.timeout.read
124
123
  else DEFAULT_COMPASS_PARSER_CLIENT_TIMEOUT
125
124
  )
126
- self.httpx_client = httpx_client or httpx.Client(
127
- timeout=self.timeout.total_seconds()
128
- )
125
+ self.httpx = httpx_client or httpx.Client(timeout=self.timeout.total_seconds())
126
+ self._own_httpx_client = httpx_client is None
127
+ self._closed = False
129
128
 
130
129
  self.metadata_config = metadata_config
131
130
  logger.info(
132
131
  f"CompassParserClient initialized with parser_url: {self.parser_url}"
133
132
  )
134
133
 
134
+ def close(self):
135
+ """Close the httpx client if it was created by this instance."""
136
+ if self._own_httpx_client and not self._closed:
137
+ self.httpx.close()
138
+ self._closed = True
139
+
140
+ def __enter__(self):
141
+ """For use by "with" statements."""
142
+ return self
143
+
144
+ def __exit__(
145
+ self,
146
+ exc_type: type[BaseException] | None,
147
+ exc_value: BaseException | None,
148
+ traceback: TracebackType | None,
149
+ ) -> None:
150
+ """For use by "with" statements."""
151
+ self.close()
152
+
135
153
  def process_folder(
136
154
  self,
137
155
  *,
@@ -390,7 +408,7 @@ class CompassParserClient:
390
408
  headers = {"Authorization": f"Bearer {self.bearer_token}"}
391
409
 
392
410
  with handle_httpx_exceptions():
393
- res = self.httpx_client.post(
411
+ res = self.httpx.post(
394
412
  url=f"{self.parser_url}/v1/process_file",
395
413
  data={"data": json.dumps(params.model_dump())},
396
414
  files={"file": (filename, file_bytes)},
@@ -11,6 +11,7 @@ import logging
11
11
  from collections.abc import Callable
12
12
  from concurrent.futures import ThreadPoolExecutor
13
13
  from datetime import timedelta
14
+ from types import TracebackType
14
15
  from typing import Any
15
16
 
16
17
  # 3rd party imports
@@ -39,9 +40,7 @@ from cohere_compass.models import (
39
40
  )
40
41
  from cohere_compass.utils.asyn import async_map
41
42
  from cohere_compass.utils.fs import open_document, scan_folder
42
- from cohere_compass.utils.retry import (
43
- is_retryable_compass_exception,
44
- )
43
+ from cohere_compass.utils.retry import is_retryable_compass_exception
45
44
 
46
45
  Fn_or_Dict = dict[str, Any] | Callable[[CompassDocument], dict[str, Any]]
47
46
 
@@ -123,12 +122,35 @@ class CompassParserAsyncClient:
123
122
  self.httpx = httpx_client or httpx.AsyncClient(
124
123
  timeout=self.timeout.total_seconds()
125
124
  )
125
+ self._own_httpx_client = httpx_client is None
126
+ self._closed = False
126
127
 
127
128
  self.metadata_config = metadata_config
128
129
  logger.info(
129
130
  f"CompassParserClient initialized with parser_url: {self.parser_url}"
130
131
  )
131
132
 
133
+ async def aclose(self):
134
+ """Close the httpx client if it was created by the CompassParserAsyncClient."""
135
+ if self._own_httpx_client and not self._closed:
136
+ await self.httpx.aclose()
137
+ self._closed = True
138
+
139
+ close = aclose # Alias for consistency with sync client
140
+
141
+ async def __aenter__(self):
142
+ """For use by "async with" statements."""
143
+ return self
144
+
145
+ async def __aexit__(
146
+ self,
147
+ exc_type: type[BaseException] | None,
148
+ exc_value: BaseException | None,
149
+ traceback: TracebackType | None,
150
+ ) -> None:
151
+ """For use by "async with" statements."""
152
+ await self.aclose()
153
+
132
154
  def process_folder(
133
155
  self,
134
156
  *,
@@ -113,6 +113,19 @@ class PresentationParsingStrategy(str, Enum):
113
113
  return cls.Unstructured
114
114
 
115
115
 
116
+ class DocxParsingStrategy(str, Enum):
117
+ """Enum for specifying the parsing strategy for DOCX files."""
118
+
119
+ # Uses https://github.com/microsoft/markitdown
120
+ MarkItDown = "MarkItDown"
121
+ # Converts the DOCX to PDF and uses the PDF parsing strategy
122
+ ConvertToPDF = "ConvertToPDF"
123
+
124
+ @classmethod
125
+ def _missing_(cls, value: Any):
126
+ return cls.MarkItDown
127
+
128
+
116
129
  class ParsingStrategy(str, Enum):
117
130
  """Enum for specifying the parsing strategy to use."""
118
131
 
@@ -193,12 +206,12 @@ class ParserConfig(BaseModel):
193
206
  vertical_table_crop_margin: int = 100
194
207
  horizontal_table_crop_margin: int = 100
195
208
 
209
+ pdf_parsing_config: PDFParsingConfig = PDFParsingConfig()
196
210
  pdf_parsing_strategy: PDFParsingStrategy = PDFParsingStrategy.QuickText
197
211
  tabular_parsing_strategy: TabularParsingStrategy = TabularParsingStrategy.Granular
198
-
199
- pdf_parsing_config: PDFParsingConfig = PDFParsingConfig()
200
-
201
212
  presentation_parsing_strategy: PresentationParsingStrategy | None = None
213
+ docx_parsing_strategy: DocxParsingStrategy | None = None
214
+
202
215
  enable_assets_returned_as_base64: bool = True
203
216
 
204
217
 
@@ -300,7 +300,7 @@ class ParseableDocument(BaseModel):
300
300
  filename: Annotated[
301
301
  str, StringConstraints(min_length=1)
302
302
  ] # Ensures the filename is a non-empty string
303
- content_type: str
303
+ content_type: str | None = None
304
304
  content_length_bytes: PositiveInt # File size must be a non-negative integer
305
305
  content_encoded_bytes: str # Base64-encoded file contents
306
306
  attributes: DocumentAttributes
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cohere-compass-sdk"
3
- version = "2.2.0"
3
+ version = "2.2.2"
4
4
  authors = []
5
5
  description = "Cohere Compass SDK"
6
6
  readme = "README.md"
@@ -93,4 +93,4 @@ omit = [
93
93
  ]
94
94
 
95
95
  [tool.coverage.html]
96
- directory = "coverage_html"
96
+ directory = "coverage_html"