nucliadb-utils 5.0.0.post796__py3-none-any.whl → 5.0.0.post809__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb_utils/aiopynecone/client.py +34 -48
- nucliadb_utils/aiopynecone/exceptions.py +90 -0
- {nucliadb_utils-5.0.0.post796.dist-info → nucliadb_utils-5.0.0.post809.dist-info}/METADATA +3 -3
- {nucliadb_utils-5.0.0.post796.dist-info → nucliadb_utils-5.0.0.post809.dist-info}/RECORD +7 -6
- {nucliadb_utils-5.0.0.post796.dist-info → nucliadb_utils-5.0.0.post809.dist-info}/WHEEL +0 -0
- {nucliadb_utils-5.0.0.post796.dist-info → nucliadb_utils-5.0.0.post809.dist-info}/top_level.txt +0 -0
- {nucliadb_utils-5.0.0.post796.dist-info → nucliadb_utils-5.0.0.post809.dist-info}/zip-safe +0 -0
@@ -25,9 +25,15 @@ from collections.abc import AsyncIterable, Iterable
|
|
25
25
|
from itertools import islice
|
26
26
|
from typing import Any, AsyncGenerator, Optional
|
27
27
|
|
28
|
+
import backoff
|
28
29
|
import httpx
|
29
30
|
|
30
31
|
from nucliadb_telemetry.metrics import Observer
|
32
|
+
from nucliadb_utils.aiopynecone.exceptions import (
|
33
|
+
PineconeAPIError,
|
34
|
+
PineconeRateLimitError,
|
35
|
+
raise_for_status,
|
36
|
+
)
|
31
37
|
from nucliadb_utils.aiopynecone.models import (
|
32
38
|
CreateIndexResponse,
|
33
39
|
ListResponse,
|
@@ -38,9 +44,13 @@ from nucliadb_utils.aiopynecone.models import (
|
|
38
44
|
|
39
45
|
logger = logging.getLogger(__name__)
|
40
46
|
|
47
|
+
|
41
48
|
pinecone_observer = Observer(
|
42
49
|
"pinecone_client",
|
43
50
|
labels={"type": ""},
|
51
|
+
error_mappings={
|
52
|
+
"rate_limit": PineconeRateLimitError,
|
53
|
+
},
|
44
54
|
)
|
45
55
|
|
46
56
|
DEFAULT_TIMEOUT = 30
|
@@ -55,25 +65,18 @@ MAX_UPSERT_PAYLOAD_SIZE = 2 * MEGA_BYTE
|
|
55
65
|
MAX_DELETE_BATCH_SIZE = 1000
|
56
66
|
|
57
67
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
exc_message = '[{http_status_code}] message="{message}" code={code} details={details}'.format(
|
71
|
-
http_status_code=http_status_code,
|
72
|
-
message=message,
|
73
|
-
code=code,
|
74
|
-
details=details,
|
75
|
-
)
|
76
|
-
super().__init__(exc_message)
|
68
|
+
RETRIABLE_EXCEPTIONS = (
|
69
|
+
PineconeRateLimitError,
|
70
|
+
httpx.ConnectError,
|
71
|
+
httpx.NetworkError,
|
72
|
+
)
|
73
|
+
|
74
|
+
backoff_handler = backoff.on_exception(
|
75
|
+
backoff.expo,
|
76
|
+
RETRIABLE_EXCEPTIONS,
|
77
|
+
jitter=backoff.random_jitter,
|
78
|
+
max_tries=4,
|
79
|
+
)
|
77
80
|
|
78
81
|
|
79
82
|
class ControlPlane:
|
@@ -105,7 +108,7 @@ class ControlPlane:
|
|
105
108
|
}
|
106
109
|
headers = {"Api-Key": self.api_key}
|
107
110
|
http_response = await self.http_session.post("/indexes", json=payload, headers=headers)
|
108
|
-
raise_for_status(http_response)
|
111
|
+
raise_for_status("create_index", http_response)
|
109
112
|
response = CreateIndexResponse.model_validate(http_response.json())
|
110
113
|
return response.host
|
111
114
|
|
@@ -121,7 +124,7 @@ class ControlPlane:
|
|
121
124
|
if response.status_code == 404: # pragma: no cover
|
122
125
|
logger.warning("Pinecone index not found.", extra={"index_name": name})
|
123
126
|
return
|
124
|
-
raise_for_status(response)
|
127
|
+
raise_for_status("delete_index", response)
|
125
128
|
|
126
129
|
|
127
130
|
class DataPlane:
|
@@ -147,6 +150,7 @@ class DataPlane:
|
|
147
150
|
def _get_request_timeout(self, timeout: Optional[float] = None) -> Optional[float]:
|
148
151
|
return timeout or self.client_timeout
|
149
152
|
|
153
|
+
@backoff_handler
|
150
154
|
@pinecone_observer.wrap({"type": "upsert"})
|
151
155
|
async def upsert(self, vectors: list[Vector], timeout: Optional[float] = None) -> None:
|
152
156
|
"""
|
@@ -165,7 +169,7 @@ class DataPlane:
|
|
165
169
|
if request_timeout is not None:
|
166
170
|
post_kwargs["timeout"] = timeout
|
167
171
|
response = await self.http_session.post("/vectors/upsert", **post_kwargs)
|
168
|
-
raise_for_status(response)
|
172
|
+
raise_for_status("upsert", response)
|
169
173
|
|
170
174
|
def _estimate_upsert_batch_size(self, vectors: list[Vector]) -> int:
|
171
175
|
"""
|
@@ -220,6 +224,7 @@ class DataPlane:
|
|
220
224
|
|
221
225
|
await asyncio.gather(*tasks)
|
222
226
|
|
227
|
+
@backoff_handler
|
223
228
|
@pinecone_observer.wrap({"type": "delete"})
|
224
229
|
async def delete(self, ids: list[str], timeout: Optional[float] = None) -> None:
|
225
230
|
"""
|
@@ -242,8 +247,9 @@ class DataPlane:
|
|
242
247
|
if request_timeout is not None:
|
243
248
|
post_kwargs["timeout"] = timeout
|
244
249
|
response = await self.http_session.post("/vectors/delete", **post_kwargs)
|
245
|
-
raise_for_status(response)
|
250
|
+
raise_for_status("delete", response)
|
246
251
|
|
252
|
+
@backoff_handler
|
247
253
|
@pinecone_observer.wrap({"type": "list_page"})
|
248
254
|
async def list_page(
|
249
255
|
self,
|
@@ -279,7 +285,7 @@ class DataPlane:
|
|
279
285
|
"/vectors/list",
|
280
286
|
**post_kwargs,
|
281
287
|
)
|
282
|
-
raise_for_status(response)
|
288
|
+
raise_for_status("list_page", response)
|
283
289
|
return ListResponse.model_validate(response.json())
|
284
290
|
|
285
291
|
async def list_all(
|
@@ -306,6 +312,7 @@ class DataPlane:
|
|
306
312
|
break
|
307
313
|
pagination_token = response.pagination.next
|
308
314
|
|
315
|
+
@backoff_handler
|
309
316
|
@pinecone_observer.wrap({"type": "delete_all"})
|
310
317
|
async def delete_all(self, timeout: Optional[float] = None):
|
311
318
|
"""
|
@@ -324,7 +331,7 @@ class DataPlane:
|
|
324
331
|
post_kwargs["timeout"] = timeout
|
325
332
|
response = await self.http_session.post("/vectors/delete", **post_kwargs)
|
326
333
|
try:
|
327
|
-
raise_for_status(response)
|
334
|
+
raise_for_status("delete_all", response)
|
328
335
|
except PineconeAPIError as err:
|
329
336
|
if err.http_status_code == 404 and err.code == 5: # pragma: no cover
|
330
337
|
# Namespace not found. No vectors to delete.
|
@@ -366,6 +373,7 @@ class DataPlane:
|
|
366
373
|
|
367
374
|
await asyncio.gather(*tasks)
|
368
375
|
|
376
|
+
@backoff_handler
|
369
377
|
@pinecone_observer.wrap({"type": "query"})
|
370
378
|
async def query(
|
371
379
|
self,
|
@@ -404,7 +412,7 @@ class DataPlane:
|
|
404
412
|
if request_timeout is not None:
|
405
413
|
post_kwargs["timeout"] = timeout
|
406
414
|
response = await self.http_session.post("/query", **post_kwargs)
|
407
|
-
raise_for_status(response)
|
415
|
+
raise_for_status("query", response)
|
408
416
|
return QueryResponse.model_validate(response.json())
|
409
417
|
|
410
418
|
|
@@ -459,28 +467,6 @@ class PineconeSession:
|
|
459
467
|
return DataPlane(api_key=api_key, index_host_session=index_host_session, timeout=timeout)
|
460
468
|
|
461
469
|
|
462
|
-
def raise_for_status(response: httpx.Response):
|
463
|
-
try:
|
464
|
-
response.raise_for_status()
|
465
|
-
except httpx.HTTPStatusError:
|
466
|
-
code = None
|
467
|
-
message = None
|
468
|
-
details = None
|
469
|
-
try:
|
470
|
-
resp_json = response.json()
|
471
|
-
code = resp_json.get("code")
|
472
|
-
message = resp_json.get("message")
|
473
|
-
details = resp_json.get("details")
|
474
|
-
except Exception:
|
475
|
-
message = response.text
|
476
|
-
raise PineconeAPIError(
|
477
|
-
http_status_code=response.status_code,
|
478
|
-
code=code,
|
479
|
-
message=message,
|
480
|
-
details=details,
|
481
|
-
)
|
482
|
-
|
483
|
-
|
484
470
|
def batchify(iterable: Iterable, batch_size: int):
|
485
471
|
"""
|
486
472
|
Split an iterable into batches of batch_size
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from typing import Any, Optional
|
22
|
+
|
23
|
+
import httpx
|
24
|
+
|
25
|
+
from nucliadb_telemetry.metrics import Counter
|
26
|
+
|
27
|
+
pinecone_errors_counter = Counter("pinecone_errors", labels={"type": ""})
|
28
|
+
|
29
|
+
|
30
|
+
class PineconeAPIError(Exception):
|
31
|
+
"""
|
32
|
+
Generic Pinecone API error.
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
http_status_code: int,
|
38
|
+
code: Optional[str] = None,
|
39
|
+
message: Optional[str] = None,
|
40
|
+
details: Optional[Any] = None,
|
41
|
+
):
|
42
|
+
self.http_status_code = http_status_code
|
43
|
+
self.code = code or ""
|
44
|
+
self.message = message or ""
|
45
|
+
self.details = details or {}
|
46
|
+
exc_message = '[{http_status_code}] message="{message}" code={code} details={details}'.format(
|
47
|
+
http_status_code=http_status_code,
|
48
|
+
message=message,
|
49
|
+
code=code,
|
50
|
+
details=details,
|
51
|
+
)
|
52
|
+
super().__init__(exc_message)
|
53
|
+
|
54
|
+
|
55
|
+
class PineconeRateLimitError(PineconeAPIError):
|
56
|
+
"""
|
57
|
+
Raised when the client has exceeded the rate limit to be able to backoff and retry.
|
58
|
+
"""
|
59
|
+
|
60
|
+
pass
|
61
|
+
|
62
|
+
|
63
|
+
def raise_for_status(operation: str, response: httpx.Response):
|
64
|
+
try:
|
65
|
+
response.raise_for_status()
|
66
|
+
except httpx.HTTPStatusError:
|
67
|
+
pinecone_errors_counter.inc(labels={"type": operation})
|
68
|
+
code = None
|
69
|
+
message = None
|
70
|
+
details = None
|
71
|
+
try:
|
72
|
+
resp_json = response.json()
|
73
|
+
code = resp_json.get("code")
|
74
|
+
message = resp_json.get("message")
|
75
|
+
details = resp_json.get("details")
|
76
|
+
except Exception:
|
77
|
+
message = response.text
|
78
|
+
if response.status_code == 429:
|
79
|
+
raise PineconeRateLimitError(
|
80
|
+
http_status_code=response.status_code,
|
81
|
+
code=code,
|
82
|
+
message=message,
|
83
|
+
details=details,
|
84
|
+
)
|
85
|
+
raise PineconeAPIError(
|
86
|
+
http_status_code=response.status_code,
|
87
|
+
code=code,
|
88
|
+
message=message,
|
89
|
+
details=details,
|
90
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nucliadb_utils
|
3
|
-
Version: 5.0.0.
|
3
|
+
Version: 5.0.0.post809
|
4
4
|
Home-page: https://nuclia.com
|
5
5
|
License: BSD
|
6
6
|
Classifier: Development Status :: 4 - Beta
|
@@ -23,8 +23,8 @@ Requires-Dist: PyNaCl
|
|
23
23
|
Requires-Dist: pyjwt >=2.4.0
|
24
24
|
Requires-Dist: memorylru >=1.1.2
|
25
25
|
Requires-Dist: mrflagly
|
26
|
-
Requires-Dist: nucliadb-protos >=5.0.0.
|
27
|
-
Requires-Dist: nucliadb-telemetry >=5.0.0.
|
26
|
+
Requires-Dist: nucliadb-protos >=5.0.0.post809
|
27
|
+
Requires-Dist: nucliadb-telemetry >=5.0.0.post809
|
28
28
|
Provides-Extra: cache
|
29
29
|
Requires-Dist: redis >=4.3.4 ; extra == 'cache'
|
30
30
|
Requires-Dist: orjson >=3.6.7 ; extra == 'cache'
|
@@ -18,7 +18,8 @@ nucliadb_utils/store.py,sha256=kQ35HemE0v4_Qg6xVqNIJi8vSFAYQtwI3rDtMsNy62Y,890
|
|
18
18
|
nucliadb_utils/transaction.py,sha256=mwcI3aIHAvU5KOGqd_Uz_d1XQzXhk_-NWY8NqU1lfb0,7307
|
19
19
|
nucliadb_utils/utilities.py,sha256=oz3tEODG2g3todnyvA-nW1Ou6xXDveL_tMKTDGdWXM4,15287
|
20
20
|
nucliadb_utils/aiopynecone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
21
|
-
nucliadb_utils/aiopynecone/client.py,sha256=
|
21
|
+
nucliadb_utils/aiopynecone/client.py,sha256=T0S6aoVefnjyKgJAGzBJGnrhBDlvSAj5DJBqdGRuzZw,18407
|
22
|
+
nucliadb_utils/aiopynecone/exceptions.py,sha256=JJ-Ui9TSoHRe8WfkHR04S-t7NsPCy1xxluPH5R7fDl8,2816
|
22
23
|
nucliadb_utils/aiopynecone/models.py,sha256=DVlCVrinHAaDxuii3fzdciFn4dqS5HTTyBavnoDlR2U,2024
|
23
24
|
nucliadb_utils/audit/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
24
25
|
nucliadb_utils/audit/audit.py,sha256=dn5ZnCVQUlCcvdjzaORghbrjk9QgVGrtkfIftq30Bp8,2819
|
@@ -63,8 +64,8 @@ nucliadb_utils/tests/indexing.py,sha256=YW2QhkhO9Q_8A4kKWJaWSvXvyQ_AiAwY1VylcfVQ
|
|
63
64
|
nucliadb_utils/tests/local.py,sha256=c3gZJJWmvOftruJkIQIwB3q_hh3uxEhqGIAVWim1Bbk,1343
|
64
65
|
nucliadb_utils/tests/nats.py,sha256=Tosonm9A9cusImyji80G4pgdXEHNVPaCLT5TbFK_ra0,7543
|
65
66
|
nucliadb_utils/tests/s3.py,sha256=YB8QqDaBXxyhHonEHmeBbRRDmvB7sTOaKBSi8KBGokg,2330
|
66
|
-
nucliadb_utils-5.0.0.
|
67
|
-
nucliadb_utils-5.0.0.
|
68
|
-
nucliadb_utils-5.0.0.
|
69
|
-
nucliadb_utils-5.0.0.
|
70
|
-
nucliadb_utils-5.0.0.
|
67
|
+
nucliadb_utils-5.0.0.post809.dist-info/METADATA,sha256=j1oedwxyH4rR_jpUuyUgFWnAcNImYZtGXSGvaIiXtnE,2073
|
68
|
+
nucliadb_utils-5.0.0.post809.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
|
69
|
+
nucliadb_utils-5.0.0.post809.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
|
70
|
+
nucliadb_utils-5.0.0.post809.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
71
|
+
nucliadb_utils-5.0.0.post809.dist-info/RECORD,,
|
File without changes
|
{nucliadb_utils-5.0.0.post796.dist-info → nucliadb_utils-5.0.0.post809.dist-info}/top_level.txt
RENAMED
File without changes
|
File without changes
|