nucliadb-utils 5.0.0.post806__py3-none-any.whl → 5.0.0.post821__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,10 +25,17 @@ from collections.abc import AsyncIterable, Iterable
25
25
  from itertools import islice
26
26
  from typing import Any, AsyncGenerator, Optional
27
27
 
28
+ import backoff
28
29
  import httpx
29
30
 
30
31
  from nucliadb_telemetry.metrics import Observer
32
+ from nucliadb_utils.aiopynecone.exceptions import (
33
+ PineconeAPIError,
34
+ PineconeRateLimitError,
35
+ raise_for_status,
36
+ )
31
37
  from nucliadb_utils.aiopynecone.models import (
38
+ CreateIndexRequest,
32
39
  CreateIndexResponse,
33
40
  ListResponse,
34
41
  QueryResponse,
@@ -38,9 +45,13 @@ from nucliadb_utils.aiopynecone.models import (
38
45
 
39
46
  logger = logging.getLogger(__name__)
40
47
 
48
+
41
49
  pinecone_observer = Observer(
42
50
  "pinecone_client",
43
51
  labels={"type": ""},
52
+ error_mappings={
53
+ "rate_limit": PineconeRateLimitError,
54
+ },
44
55
  )
45
56
 
46
57
  DEFAULT_TIMEOUT = 30
@@ -55,25 +66,18 @@ MAX_UPSERT_PAYLOAD_SIZE = 2 * MEGA_BYTE
55
66
  MAX_DELETE_BATCH_SIZE = 1000
56
67
 
57
68
 
58
- class PineconeAPIError(Exception):
59
- def __init__(
60
- self,
61
- http_status_code: int,
62
- code: Optional[str] = None,
63
- message: Optional[str] = None,
64
- details: Optional[Any] = None,
65
- ):
66
- self.http_status_code = http_status_code
67
- self.code = code or ""
68
- self.message = message or ""
69
- self.details = details or {}
70
- exc_message = '[{http_status_code}] message="{message}" code={code} details={details}'.format(
71
- http_status_code=http_status_code,
72
- message=message,
73
- code=code,
74
- details=details,
75
- )
76
- super().__init__(exc_message)
69
+ RETRIABLE_EXCEPTIONS = (
70
+ PineconeRateLimitError,
71
+ httpx.ConnectError,
72
+ httpx.NetworkError,
73
+ )
74
+
75
+ backoff_handler = backoff.on_exception(
76
+ backoff.expo,
77
+ RETRIABLE_EXCEPTIONS,
78
+ jitter=backoff.random_jitter,
79
+ max_tries=4,
80
+ )
77
81
 
78
82
 
79
83
  class ControlPlane:
@@ -97,15 +101,17 @@ class ControlPlane:
97
101
  Returns:
98
102
  - The index host to be used for data plane operations.
99
103
  """
100
- payload = {
101
- "name": name,
102
- "dimension": dimension,
103
- "metric": metric,
104
- "spec": {"serverless": {"cloud": "aws", "region": "us-east-1"}},
105
- }
104
+ payload = CreateIndexRequest(
105
+ name=name,
106
+ dimension=dimension,
107
+ metric=metric,
108
+ spec={"serverless": {"cloud": "aws", "region": "us-east-1"}},
109
+ )
106
110
  headers = {"Api-Key": self.api_key}
107
- http_response = await self.http_session.post("/indexes", json=payload, headers=headers)
108
- raise_for_status(http_response)
111
+ http_response = await self.http_session.post(
112
+ "/indexes", json=payload.model_dump(), headers=headers
113
+ )
114
+ raise_for_status("create_index", http_response)
109
115
  response = CreateIndexResponse.model_validate(http_response.json())
110
116
  return response.host
111
117
 
@@ -121,7 +127,7 @@ class ControlPlane:
121
127
  if response.status_code == 404: # pragma: no cover
122
128
  logger.warning("Pinecone index not found.", extra={"index_name": name})
123
129
  return
124
- raise_for_status(response)
130
+ raise_for_status("delete_index", response)
125
131
 
126
132
 
127
133
  class DataPlane:
@@ -147,6 +153,7 @@ class DataPlane:
147
153
  def _get_request_timeout(self, timeout: Optional[float] = None) -> Optional[float]:
148
154
  return timeout or self.client_timeout
149
155
 
156
+ @backoff_handler
150
157
  @pinecone_observer.wrap({"type": "upsert"})
151
158
  async def upsert(self, vectors: list[Vector], timeout: Optional[float] = None) -> None:
152
159
  """
@@ -165,7 +172,7 @@ class DataPlane:
165
172
  if request_timeout is not None:
166
173
  post_kwargs["timeout"] = timeout
167
174
  response = await self.http_session.post("/vectors/upsert", **post_kwargs)
168
- raise_for_status(response)
175
+ raise_for_status("upsert", response)
169
176
 
170
177
  def _estimate_upsert_batch_size(self, vectors: list[Vector]) -> int:
171
178
  """
@@ -220,6 +227,7 @@ class DataPlane:
220
227
 
221
228
  await asyncio.gather(*tasks)
222
229
 
230
+ @backoff_handler
223
231
  @pinecone_observer.wrap({"type": "delete"})
224
232
  async def delete(self, ids: list[str], timeout: Optional[float] = None) -> None:
225
233
  """
@@ -242,8 +250,9 @@ class DataPlane:
242
250
  if request_timeout is not None:
243
251
  post_kwargs["timeout"] = timeout
244
252
  response = await self.http_session.post("/vectors/delete", **post_kwargs)
245
- raise_for_status(response)
253
+ raise_for_status("delete", response)
246
254
 
255
+ @backoff_handler
247
256
  @pinecone_observer.wrap({"type": "list_page"})
248
257
  async def list_page(
249
258
  self,
@@ -279,7 +288,7 @@ class DataPlane:
279
288
  "/vectors/list",
280
289
  **post_kwargs,
281
290
  )
282
- raise_for_status(response)
291
+ raise_for_status("list_page", response)
283
292
  return ListResponse.model_validate(response.json())
284
293
 
285
294
  async def list_all(
@@ -306,6 +315,7 @@ class DataPlane:
306
315
  break
307
316
  pagination_token = response.pagination.next
308
317
 
318
+ @backoff_handler
309
319
  @pinecone_observer.wrap({"type": "delete_all"})
310
320
  async def delete_all(self, timeout: Optional[float] = None):
311
321
  """
@@ -324,7 +334,7 @@ class DataPlane:
324
334
  post_kwargs["timeout"] = timeout
325
335
  response = await self.http_session.post("/vectors/delete", **post_kwargs)
326
336
  try:
327
- raise_for_status(response)
337
+ raise_for_status("delete_all", response)
328
338
  except PineconeAPIError as err:
329
339
  if err.http_status_code == 404 and err.code == 5: # pragma: no cover
330
340
  # Namespace not found. No vectors to delete.
@@ -366,6 +376,7 @@ class DataPlane:
366
376
 
367
377
  await asyncio.gather(*tasks)
368
378
 
379
+ @backoff_handler
369
380
  @pinecone_observer.wrap({"type": "query"})
370
381
  async def query(
371
382
  self,
@@ -404,7 +415,7 @@ class DataPlane:
404
415
  if request_timeout is not None:
405
416
  post_kwargs["timeout"] = timeout
406
417
  response = await self.http_session.post("/query", **post_kwargs)
407
- raise_for_status(response)
418
+ raise_for_status("query", response)
408
419
  return QueryResponse.model_validate(response.json())
409
420
 
410
421
 
@@ -459,28 +470,6 @@ class PineconeSession:
459
470
  return DataPlane(api_key=api_key, index_host_session=index_host_session, timeout=timeout)
460
471
 
461
472
 
462
- def raise_for_status(response: httpx.Response):
463
- try:
464
- response.raise_for_status()
465
- except httpx.HTTPStatusError:
466
- code = None
467
- message = None
468
- details = None
469
- try:
470
- resp_json = response.json()
471
- code = resp_json.get("code")
472
- message = resp_json.get("message")
473
- details = resp_json.get("details")
474
- except Exception:
475
- message = response.text
476
- raise PineconeAPIError(
477
- http_status_code=response.status_code,
478
- code=code,
479
- message=message,
480
- details=details,
481
- )
482
-
483
-
484
473
  def batchify(iterable: Iterable, batch_size: int):
485
474
  """
486
475
  Split an iterable into batches of batch_size
@@ -0,0 +1,91 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from typing import Any, Optional
22
+
23
+ import httpx
24
+
25
+ from nucliadb_telemetry.metrics import Counter
26
+
27
+ pinecone_errors_counter = Counter("pinecone_errors", labels={"type": ""})
28
+
29
+
30
+ class PineconeAPIError(Exception):
31
+ """
32
+ Generic Pinecone API error.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ http_status_code: int,
38
+ code: Optional[str] = None,
39
+ message: Optional[str] = None,
40
+ details: Optional[Any] = None,
41
+ ):
42
+ self.http_status_code = http_status_code
43
+ self.code = code or ""
44
+ self.message = message or ""
45
+ self.details = details or {}
46
+ exc_message = '[{http_status_code}] message="{message}" code={code} details={details}'.format(
47
+ http_status_code=http_status_code,
48
+ message=message,
49
+ code=code,
50
+ details=details,
51
+ )
52
+ super().__init__(exc_message)
53
+
54
+
55
+ class PineconeRateLimitError(PineconeAPIError):
56
+ """
57
+ Raised when the client has exceeded the rate limit to be able to backoff and retry.
58
+ """
59
+
60
+ pass
61
+
62
+
63
+ def raise_for_status(operation: str, response: httpx.Response):
64
+ try:
65
+ response.raise_for_status()
66
+ except httpx.HTTPStatusError:
67
+ pinecone_errors_counter.inc(labels={"type": operation})
68
+ code = None
69
+ message = None
70
+ details = None
71
+ try:
72
+ resp_json = response.json()
73
+ error = resp_json.get("error") or {}
74
+ code = error.get("code")
75
+ message = error.get("message")
76
+ details = error.get("details")
77
+ except Exception: # pragma: no cover
78
+ message = response.text
79
+ if response.status_code == 429:
80
+ raise PineconeRateLimitError(
81
+ http_status_code=response.status_code,
82
+ code=code,
83
+ message=message,
84
+ details=details,
85
+ )
86
+ raise PineconeAPIError(
87
+ http_status_code=response.status_code,
88
+ code=code,
89
+ message=message,
90
+ details=details,
91
+ )
@@ -20,14 +20,47 @@
20
20
  import json
21
21
  from typing import Any, Optional
22
22
 
23
+ import pydantic
23
24
  from pydantic import BaseModel, Field, field_validator
25
+ from typing_extensions import Annotated
24
26
 
25
27
  KILO_BYTE = 1024
26
28
  MAX_METADATA_SIZE = 40 * KILO_BYTE
29
+ MAX_INDEX_NAME_LENGTH = 45
27
30
 
28
31
 
29
32
  # Requests
30
33
 
34
+ IndexNamePattern = r"^[a-z0-9-]+$"
35
+
36
+
37
+ def validate_index_name(value, handler, info):
38
+ try:
39
+ return handler(value)
40
+ except pydantic.ValidationError as e:
41
+ if any(x["type"] == "string_pattern_mismatch" for x in e.errors()):
42
+ raise ValueError(
43
+ f"Invalid field_id: '{value}'. Pinecone index names must be a string with only "
44
+ "lowercase letters, numbers and dashes."
45
+ )
46
+ else:
47
+ raise e
48
+
49
+
50
+ IndexNameStr = Annotated[
51
+ str,
52
+ pydantic.StringConstraints(pattern=IndexNamePattern),
53
+ pydantic.StringConstraints(min_length=1, max_length=MAX_INDEX_NAME_LENGTH),
54
+ pydantic.WrapValidator(validate_index_name),
55
+ ]
56
+
57
+
58
+ class CreateIndexRequest(BaseModel):
59
+ name: IndexNameStr
60
+ dimension: int
61
+ metric: str
62
+ spec: dict[str, Any] = {}
63
+
31
64
 
32
65
  class Vector(BaseModel):
33
66
  id: str = Field(max_length=512)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nucliadb_utils
3
- Version: 5.0.0.post806
3
+ Version: 5.0.0.post821
4
4
  Home-page: https://nuclia.com
5
5
  License: BSD
6
6
  Classifier: Development Status :: 4 - Beta
@@ -23,8 +23,8 @@ Requires-Dist: PyNaCl
23
23
  Requires-Dist: pyjwt >=2.4.0
24
24
  Requires-Dist: memorylru >=1.1.2
25
25
  Requires-Dist: mrflagly
26
- Requires-Dist: nucliadb-protos >=5.0.0.post806
27
- Requires-Dist: nucliadb-telemetry >=5.0.0.post806
26
+ Requires-Dist: nucliadb-protos >=5.0.0.post821
27
+ Requires-Dist: nucliadb-telemetry >=5.0.0.post821
28
28
  Provides-Extra: cache
29
29
  Requires-Dist: redis >=4.3.4 ; extra == 'cache'
30
30
  Requires-Dist: orjson >=3.6.7 ; extra == 'cache'
@@ -18,8 +18,9 @@ nucliadb_utils/store.py,sha256=kQ35HemE0v4_Qg6xVqNIJi8vSFAYQtwI3rDtMsNy62Y,890
18
18
  nucliadb_utils/transaction.py,sha256=mwcI3aIHAvU5KOGqd_Uz_d1XQzXhk_-NWY8NqU1lfb0,7307
19
19
  nucliadb_utils/utilities.py,sha256=oz3tEODG2g3todnyvA-nW1Ou6xXDveL_tMKTDGdWXM4,15287
20
20
  nucliadb_utils/aiopynecone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
21
- nucliadb_utils/aiopynecone/client.py,sha256=kvGLCzSbDaMIWx0LK9WBAL7QsyuPmUjC7cPv5djMdFw,19028
22
- nucliadb_utils/aiopynecone/models.py,sha256=DVlCVrinHAaDxuii3fzdciFn4dqS5HTTyBavnoDlR2U,2024
21
+ nucliadb_utils/aiopynecone/client.py,sha256=wUQIUZHKvhMhmLVfwrOF_nMBcf9l-4mXCvlSI0l0H24,18472
22
+ nucliadb_utils/aiopynecone/exceptions.py,sha256=hFhq-UEY4slqNWjObXr_LPnRf_AQ1vpcG4SF2XRFd1E,2873
23
+ nucliadb_utils/aiopynecone/models.py,sha256=sEmifzQ6rvqIB8nbkJbh8-hrCW4j8J9lJ_xLRaTAqro,2934
23
24
  nucliadb_utils/audit/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
24
25
  nucliadb_utils/audit/audit.py,sha256=dn5ZnCVQUlCcvdjzaORghbrjk9QgVGrtkfIftq30Bp8,2819
25
26
  nucliadb_utils/audit/basic.py,sha256=NViey6mKbCXqRTLDBX2xNTcCg9I-2e4oB2xkekuhDvM,3392
@@ -63,8 +64,8 @@ nucliadb_utils/tests/indexing.py,sha256=YW2QhkhO9Q_8A4kKWJaWSvXvyQ_AiAwY1VylcfVQ
63
64
  nucliadb_utils/tests/local.py,sha256=c3gZJJWmvOftruJkIQIwB3q_hh3uxEhqGIAVWim1Bbk,1343
64
65
  nucliadb_utils/tests/nats.py,sha256=Tosonm9A9cusImyji80G4pgdXEHNVPaCLT5TbFK_ra0,7543
65
66
  nucliadb_utils/tests/s3.py,sha256=YB8QqDaBXxyhHonEHmeBbRRDmvB7sTOaKBSi8KBGokg,2330
66
- nucliadb_utils-5.0.0.post806.dist-info/METADATA,sha256=HzorldEg8W_vDYIX6r7rQgVf_MaXkNeJQQNdrYgSblc,2073
67
- nucliadb_utils-5.0.0.post806.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
68
- nucliadb_utils-5.0.0.post806.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
69
- nucliadb_utils-5.0.0.post806.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
70
- nucliadb_utils-5.0.0.post806.dist-info/RECORD,,
67
+ nucliadb_utils-5.0.0.post821.dist-info/METADATA,sha256=54y8hOk8DjtRFOBXlvQnqvcvOOgLvdV3LKexpyS-iMM,2073
68
+ nucliadb_utils-5.0.0.post821.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
69
+ nucliadb_utils-5.0.0.post821.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
70
+ nucliadb_utils-5.0.0.post821.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
71
+ nucliadb_utils-5.0.0.post821.dist-info/RECORD,,