nucliadb-utils 5.0.0.post796__py3-none-any.whl → 5.0.0.post809__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,9 +25,15 @@ from collections.abc import AsyncIterable, Iterable
25
25
  from itertools import islice
26
26
  from typing import Any, AsyncGenerator, Optional
27
27
 
28
+ import backoff
28
29
  import httpx
29
30
 
30
31
  from nucliadb_telemetry.metrics import Observer
32
+ from nucliadb_utils.aiopynecone.exceptions import (
33
+ PineconeAPIError,
34
+ PineconeRateLimitError,
35
+ raise_for_status,
36
+ )
31
37
  from nucliadb_utils.aiopynecone.models import (
32
38
  CreateIndexResponse,
33
39
  ListResponse,
@@ -38,9 +44,13 @@ from nucliadb_utils.aiopynecone.models import (
38
44
 
39
45
  logger = logging.getLogger(__name__)
40
46
 
47
+
41
48
  pinecone_observer = Observer(
42
49
  "pinecone_client",
43
50
  labels={"type": ""},
51
+ error_mappings={
52
+ "rate_limit": PineconeRateLimitError,
53
+ },
44
54
  )
45
55
 
46
56
  DEFAULT_TIMEOUT = 30
@@ -55,25 +65,18 @@ MAX_UPSERT_PAYLOAD_SIZE = 2 * MEGA_BYTE
55
65
  MAX_DELETE_BATCH_SIZE = 1000
56
66
 
57
67
 
58
- class PineconeAPIError(Exception):
59
- def __init__(
60
- self,
61
- http_status_code: int,
62
- code: Optional[str] = None,
63
- message: Optional[str] = None,
64
- details: Optional[Any] = None,
65
- ):
66
- self.http_status_code = http_status_code
67
- self.code = code or ""
68
- self.message = message or ""
69
- self.details = details or {}
70
- exc_message = '[{http_status_code}] message="{message}" code={code} details={details}'.format(
71
- http_status_code=http_status_code,
72
- message=message,
73
- code=code,
74
- details=details,
75
- )
76
- super().__init__(exc_message)
68
+ RETRIABLE_EXCEPTIONS = (
69
+ PineconeRateLimitError,
70
+ httpx.ConnectError,
71
+ httpx.NetworkError,
72
+ )
73
+
74
+ backoff_handler = backoff.on_exception(
75
+ backoff.expo,
76
+ RETRIABLE_EXCEPTIONS,
77
+ jitter=backoff.random_jitter,
78
+ max_tries=4,
79
+ )
77
80
 
78
81
 
79
82
  class ControlPlane:
@@ -105,7 +108,7 @@ class ControlPlane:
105
108
  }
106
109
  headers = {"Api-Key": self.api_key}
107
110
  http_response = await self.http_session.post("/indexes", json=payload, headers=headers)
108
- raise_for_status(http_response)
111
+ raise_for_status("create_index", http_response)
109
112
  response = CreateIndexResponse.model_validate(http_response.json())
110
113
  return response.host
111
114
 
@@ -121,7 +124,7 @@ class ControlPlane:
121
124
  if response.status_code == 404: # pragma: no cover
122
125
  logger.warning("Pinecone index not found.", extra={"index_name": name})
123
126
  return
124
- raise_for_status(response)
127
+ raise_for_status("delete_index", response)
125
128
 
126
129
 
127
130
  class DataPlane:
@@ -147,6 +150,7 @@ class DataPlane:
147
150
  def _get_request_timeout(self, timeout: Optional[float] = None) -> Optional[float]:
148
151
  return timeout or self.client_timeout
149
152
 
153
+ @backoff_handler
150
154
  @pinecone_observer.wrap({"type": "upsert"})
151
155
  async def upsert(self, vectors: list[Vector], timeout: Optional[float] = None) -> None:
152
156
  """
@@ -165,7 +169,7 @@ class DataPlane:
165
169
  if request_timeout is not None:
166
170
  post_kwargs["timeout"] = timeout
167
171
  response = await self.http_session.post("/vectors/upsert", **post_kwargs)
168
- raise_for_status(response)
172
+ raise_for_status("upsert", response)
169
173
 
170
174
  def _estimate_upsert_batch_size(self, vectors: list[Vector]) -> int:
171
175
  """
@@ -220,6 +224,7 @@ class DataPlane:
220
224
 
221
225
  await asyncio.gather(*tasks)
222
226
 
227
+ @backoff_handler
223
228
  @pinecone_observer.wrap({"type": "delete"})
224
229
  async def delete(self, ids: list[str], timeout: Optional[float] = None) -> None:
225
230
  """
@@ -242,8 +247,9 @@ class DataPlane:
242
247
  if request_timeout is not None:
243
248
  post_kwargs["timeout"] = timeout
244
249
  response = await self.http_session.post("/vectors/delete", **post_kwargs)
245
- raise_for_status(response)
250
+ raise_for_status("delete", response)
246
251
 
252
+ @backoff_handler
247
253
  @pinecone_observer.wrap({"type": "list_page"})
248
254
  async def list_page(
249
255
  self,
@@ -279,7 +285,7 @@ class DataPlane:
279
285
  "/vectors/list",
280
286
  **post_kwargs,
281
287
  )
282
- raise_for_status(response)
288
+ raise_for_status("list_page", response)
283
289
  return ListResponse.model_validate(response.json())
284
290
 
285
291
  async def list_all(
@@ -306,6 +312,7 @@ class DataPlane:
306
312
  break
307
313
  pagination_token = response.pagination.next
308
314
 
315
+ @backoff_handler
309
316
  @pinecone_observer.wrap({"type": "delete_all"})
310
317
  async def delete_all(self, timeout: Optional[float] = None):
311
318
  """
@@ -324,7 +331,7 @@ class DataPlane:
324
331
  post_kwargs["timeout"] = timeout
325
332
  response = await self.http_session.post("/vectors/delete", **post_kwargs)
326
333
  try:
327
- raise_for_status(response)
334
+ raise_for_status("delete_all", response)
328
335
  except PineconeAPIError as err:
329
336
  if err.http_status_code == 404 and err.code == 5: # pragma: no cover
330
337
  # Namespace not found. No vectors to delete.
@@ -366,6 +373,7 @@ class DataPlane:
366
373
 
367
374
  await asyncio.gather(*tasks)
368
375
 
376
+ @backoff_handler
369
377
  @pinecone_observer.wrap({"type": "query"})
370
378
  async def query(
371
379
  self,
@@ -404,7 +412,7 @@ class DataPlane:
404
412
  if request_timeout is not None:
405
413
  post_kwargs["timeout"] = timeout
406
414
  response = await self.http_session.post("/query", **post_kwargs)
407
- raise_for_status(response)
415
+ raise_for_status("query", response)
408
416
  return QueryResponse.model_validate(response.json())
409
417
 
410
418
 
@@ -459,28 +467,6 @@ class PineconeSession:
459
467
  return DataPlane(api_key=api_key, index_host_session=index_host_session, timeout=timeout)
460
468
 
461
469
 
462
- def raise_for_status(response: httpx.Response):
463
- try:
464
- response.raise_for_status()
465
- except httpx.HTTPStatusError:
466
- code = None
467
- message = None
468
- details = None
469
- try:
470
- resp_json = response.json()
471
- code = resp_json.get("code")
472
- message = resp_json.get("message")
473
- details = resp_json.get("details")
474
- except Exception:
475
- message = response.text
476
- raise PineconeAPIError(
477
- http_status_code=response.status_code,
478
- code=code,
479
- message=message,
480
- details=details,
481
- )
482
-
483
-
484
470
  def batchify(iterable: Iterable, batch_size: int):
485
471
  """
486
472
  Split an iterable into batches of batch_size
@@ -0,0 +1,90 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from typing import Any, Optional
22
+
23
+ import httpx
24
+
25
+ from nucliadb_telemetry.metrics import Counter
26
+
27
+ pinecone_errors_counter = Counter("pinecone_errors", labels={"type": ""})
28
+
29
+
30
+ class PineconeAPIError(Exception):
31
+ """
32
+ Generic Pinecone API error.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ http_status_code: int,
38
+ code: Optional[str] = None,
39
+ message: Optional[str] = None,
40
+ details: Optional[Any] = None,
41
+ ):
42
+ self.http_status_code = http_status_code
43
+ self.code = code or ""
44
+ self.message = message or ""
45
+ self.details = details or {}
46
+ exc_message = '[{http_status_code}] message="{message}" code={code} details={details}'.format(
47
+ http_status_code=http_status_code,
48
+ message=message,
49
+ code=code,
50
+ details=details,
51
+ )
52
+ super().__init__(exc_message)
53
+
54
+
55
+ class PineconeRateLimitError(PineconeAPIError):
56
+ """
57
+ Raised when the client has exceeded the rate limit to be able to backoff and retry.
58
+ """
59
+
60
+ pass
61
+
62
+
63
+ def raise_for_status(operation: str, response: httpx.Response):
64
+ try:
65
+ response.raise_for_status()
66
+ except httpx.HTTPStatusError:
67
+ pinecone_errors_counter.inc(labels={"type": operation})
68
+ code = None
69
+ message = None
70
+ details = None
71
+ try:
72
+ resp_json = response.json()
73
+ code = resp_json.get("code")
74
+ message = resp_json.get("message")
75
+ details = resp_json.get("details")
76
+ except Exception:
77
+ message = response.text
78
+ if response.status_code == 429:
79
+ raise PineconeRateLimitError(
80
+ http_status_code=response.status_code,
81
+ code=code,
82
+ message=message,
83
+ details=details,
84
+ )
85
+ raise PineconeAPIError(
86
+ http_status_code=response.status_code,
87
+ code=code,
88
+ message=message,
89
+ details=details,
90
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nucliadb_utils
3
- Version: 5.0.0.post796
3
+ Version: 5.0.0.post809
4
4
  Home-page: https://nuclia.com
5
5
  License: BSD
6
6
  Classifier: Development Status :: 4 - Beta
@@ -23,8 +23,8 @@ Requires-Dist: PyNaCl
23
23
  Requires-Dist: pyjwt >=2.4.0
24
24
  Requires-Dist: memorylru >=1.1.2
25
25
  Requires-Dist: mrflagly
26
- Requires-Dist: nucliadb-protos >=5.0.0.post796
27
- Requires-Dist: nucliadb-telemetry >=5.0.0.post796
26
+ Requires-Dist: nucliadb-protos >=5.0.0.post809
27
+ Requires-Dist: nucliadb-telemetry >=5.0.0.post809
28
28
  Provides-Extra: cache
29
29
  Requires-Dist: redis >=4.3.4 ; extra == 'cache'
30
30
  Requires-Dist: orjson >=3.6.7 ; extra == 'cache'
@@ -18,7 +18,8 @@ nucliadb_utils/store.py,sha256=kQ35HemE0v4_Qg6xVqNIJi8vSFAYQtwI3rDtMsNy62Y,890
18
18
  nucliadb_utils/transaction.py,sha256=mwcI3aIHAvU5KOGqd_Uz_d1XQzXhk_-NWY8NqU1lfb0,7307
19
19
  nucliadb_utils/utilities.py,sha256=oz3tEODG2g3todnyvA-nW1Ou6xXDveL_tMKTDGdWXM4,15287
20
20
  nucliadb_utils/aiopynecone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
21
- nucliadb_utils/aiopynecone/client.py,sha256=kvGLCzSbDaMIWx0LK9WBAL7QsyuPmUjC7cPv5djMdFw,19028
21
+ nucliadb_utils/aiopynecone/client.py,sha256=T0S6aoVefnjyKgJAGzBJGnrhBDlvSAj5DJBqdGRuzZw,18407
22
+ nucliadb_utils/aiopynecone/exceptions.py,sha256=JJ-Ui9TSoHRe8WfkHR04S-t7NsPCy1xxluPH5R7fDl8,2816
22
23
  nucliadb_utils/aiopynecone/models.py,sha256=DVlCVrinHAaDxuii3fzdciFn4dqS5HTTyBavnoDlR2U,2024
23
24
  nucliadb_utils/audit/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
24
25
  nucliadb_utils/audit/audit.py,sha256=dn5ZnCVQUlCcvdjzaORghbrjk9QgVGrtkfIftq30Bp8,2819
@@ -63,8 +64,8 @@ nucliadb_utils/tests/indexing.py,sha256=YW2QhkhO9Q_8A4kKWJaWSvXvyQ_AiAwY1VylcfVQ
63
64
  nucliadb_utils/tests/local.py,sha256=c3gZJJWmvOftruJkIQIwB3q_hh3uxEhqGIAVWim1Bbk,1343
64
65
  nucliadb_utils/tests/nats.py,sha256=Tosonm9A9cusImyji80G4pgdXEHNVPaCLT5TbFK_ra0,7543
65
66
  nucliadb_utils/tests/s3.py,sha256=YB8QqDaBXxyhHonEHmeBbRRDmvB7sTOaKBSi8KBGokg,2330
66
- nucliadb_utils-5.0.0.post796.dist-info/METADATA,sha256=mwdR7jpGf8oA1T8pa62MXf_BajMORpePKeK7KpiujIM,2073
67
- nucliadb_utils-5.0.0.post796.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
68
- nucliadb_utils-5.0.0.post796.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
69
- nucliadb_utils-5.0.0.post796.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
70
- nucliadb_utils-5.0.0.post796.dist-info/RECORD,,
67
+ nucliadb_utils-5.0.0.post809.dist-info/METADATA,sha256=j1oedwxyH4rR_jpUuyUgFWnAcNImYZtGXSGvaIiXtnE,2073
68
+ nucliadb_utils-5.0.0.post809.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
69
+ nucliadb_utils-5.0.0.post809.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
70
+ nucliadb_utils-5.0.0.post809.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
71
+ nucliadb_utils-5.0.0.post809.dist-info/RECORD,,