nucliadb-utils 6.8.1.post4945__py3-none-any.whl → 6.9.5.post5434__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb_utils/const.py +1 -1
- nucliadb_utils/featureflagging.py +1 -4
- nucliadb_utils/settings.py +6 -0
- nucliadb_utils/storages/azure.py +47 -24
- nucliadb_utils/storages/exceptions.py +0 -6
- nucliadb_utils/storages/s3.py +1 -1
- nucliadb_utils/storages/settings.py +3 -0
- nucliadb_utils/storages/storage.py +1 -0
- nucliadb_utils/tests/azure.py +12 -3
- nucliadb_utils/tests/fixtures.py +4 -2
- nucliadb_utils/tests/nats.py +13 -2
- nucliadb_utils/utilities.py +8 -15
- {nucliadb_utils-6.8.1.post4945.dist-info → nucliadb_utils-6.9.5.post5434.dist-info}/METADATA +5 -5
- {nucliadb_utils-6.8.1.post4945.dist-info → nucliadb_utils-6.9.5.post5434.dist-info}/RECORD +16 -20
- nucliadb_utils/aiopynecone/__init__.py +0 -19
- nucliadb_utils/aiopynecone/client.py +0 -636
- nucliadb_utils/aiopynecone/exceptions.py +0 -131
- nucliadb_utils/aiopynecone/models.py +0 -139
- {nucliadb_utils-6.8.1.post4945.dist-info → nucliadb_utils-6.9.5.post5434.dist-info}/WHEEL +0 -0
- {nucliadb_utils-6.8.1.post4945.dist-info → nucliadb_utils-6.9.5.post5434.dist-info}/top_level.txt +0 -0
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
|
2
|
-
#
|
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
|
5
|
-
#
|
|
6
|
-
# AGPL:
|
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
|
10
|
-
# License, or (at your option) any later version.
|
|
11
|
-
#
|
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
-
# GNU Affero General Public License for more details.
|
|
16
|
-
#
|
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
from typing import Any, Optional
|
|
22
|
-
|
|
23
|
-
import httpx
|
|
24
|
-
|
|
25
|
-
from nucliadb_telemetry.metrics import Counter
|
|
26
|
-
|
|
27
|
-
pinecone_errors_counter = Counter("pinecone_errors", labels={"type": "", "status_code": ""})
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class PineconeAPIError(Exception):
|
|
31
|
-
"""
|
|
32
|
-
Generic Pinecone API error.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
def __init__(
|
|
36
|
-
self,
|
|
37
|
-
http_status_code: int,
|
|
38
|
-
code: Optional[str] = None,
|
|
39
|
-
message: Optional[str] = None,
|
|
40
|
-
details: Optional[Any] = None,
|
|
41
|
-
):
|
|
42
|
-
self.http_status_code = http_status_code
|
|
43
|
-
self.code = code or ""
|
|
44
|
-
self.message = message or ""
|
|
45
|
-
self.details = details or {}
|
|
46
|
-
exc_message = '[{http_status_code}] message="{message}" code={code} details={details}'.format(
|
|
47
|
-
http_status_code=http_status_code,
|
|
48
|
-
message=message,
|
|
49
|
-
code=code,
|
|
50
|
-
details=details,
|
|
51
|
-
)
|
|
52
|
-
super().__init__(exc_message)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class RetriablePineconeAPIError(PineconeAPIError):
|
|
56
|
-
"""
|
|
57
|
-
Raised when the client can retry the operation.
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
pass
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class PineconeRateLimitError(RetriablePineconeAPIError):
|
|
64
|
-
"""
|
|
65
|
-
Raised when the client has exceeded the rate limit to be able to backoff and retry.
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
pass
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class PineconeNeedsPlanUpgradeError(PineconeAPIError):
|
|
72
|
-
"""
|
|
73
|
-
Raised when the client needs to upgrade the plan to continue using the service.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
pass
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class MetadataTooLargeError(ValueError):
|
|
80
|
-
"""
|
|
81
|
-
Raised when the metadata of a vector to be upserted is too large.
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
|
-
pass
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def raise_for_status(operation: str, response: httpx.Response):
|
|
88
|
-
try:
|
|
89
|
-
response.raise_for_status()
|
|
90
|
-
except httpx.HTTPStatusError:
|
|
91
|
-
pinecone_errors_counter.inc(labels={"type": operation, "status_code": str(response.status_code)})
|
|
92
|
-
code = None
|
|
93
|
-
message = None
|
|
94
|
-
details = None
|
|
95
|
-
try:
|
|
96
|
-
resp_json = response.json()
|
|
97
|
-
error = resp_json.get("error") or {}
|
|
98
|
-
code = resp_json.get("code") or error.get("code")
|
|
99
|
-
message = resp_json.get("message") or error.get("message") or ""
|
|
100
|
-
details = resp_json.get("details") or error.get("details")
|
|
101
|
-
except Exception: # pragma: no cover
|
|
102
|
-
message = response.text
|
|
103
|
-
if response.status_code == 429:
|
|
104
|
-
if "month" in message:
|
|
105
|
-
raise PineconeNeedsPlanUpgradeError(
|
|
106
|
-
http_status_code=response.status_code,
|
|
107
|
-
code=code,
|
|
108
|
-
message=message,
|
|
109
|
-
details=details,
|
|
110
|
-
)
|
|
111
|
-
raise PineconeRateLimitError(
|
|
112
|
-
http_status_code=response.status_code,
|
|
113
|
-
code=code,
|
|
114
|
-
message=message,
|
|
115
|
-
details=details,
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
if str(response.status_code).startswith("5"):
|
|
119
|
-
raise RetriablePineconeAPIError(
|
|
120
|
-
http_status_code=response.status_code,
|
|
121
|
-
code=code,
|
|
122
|
-
message=message,
|
|
123
|
-
details=details,
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
raise PineconeAPIError(
|
|
127
|
-
http_status_code=response.status_code,
|
|
128
|
-
code=code,
|
|
129
|
-
message=message,
|
|
130
|
-
details=details,
|
|
131
|
-
)
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
|
2
|
-
#
|
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
|
5
|
-
#
|
|
6
|
-
# AGPL:
|
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
|
10
|
-
# License, or (at your option) any later version.
|
|
11
|
-
#
|
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
-
# GNU Affero General Public License for more details.
|
|
16
|
-
#
|
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
-
#
|
|
20
|
-
import json
|
|
21
|
-
from typing import Any, Optional
|
|
22
|
-
|
|
23
|
-
import pydantic
|
|
24
|
-
from pydantic import BaseModel, Field, field_validator
|
|
25
|
-
from typing_extensions import Annotated
|
|
26
|
-
|
|
27
|
-
from nucliadb_utils.aiopynecone.exceptions import MetadataTooLargeError
|
|
28
|
-
|
|
29
|
-
KILO_BYTE = 1024
|
|
30
|
-
MAX_METADATA_SIZE = 40 * KILO_BYTE
|
|
31
|
-
MAX_INDEX_NAME_LENGTH = 45
|
|
32
|
-
MAX_VECTOR_ID_LENGTH = 512
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# Requests
|
|
36
|
-
|
|
37
|
-
IndexNamePattern = r"^[a-z0-9-]+$"
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def validate_index_name(value, handler, info):
|
|
41
|
-
try:
|
|
42
|
-
return handler(value)
|
|
43
|
-
except pydantic.ValidationError as e:
|
|
44
|
-
if any(x["type"] == "string_pattern_mismatch" for x in e.errors()):
|
|
45
|
-
raise ValueError(
|
|
46
|
-
f"Invalid field_id: '{value}'. Pinecone index names must be a string with only "
|
|
47
|
-
"lowercase letters, numbers and dashes."
|
|
48
|
-
)
|
|
49
|
-
else:
|
|
50
|
-
raise e
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
IndexNameStr = Annotated[
|
|
54
|
-
str,
|
|
55
|
-
pydantic.StringConstraints(pattern=IndexNamePattern, min_length=1, max_length=MAX_INDEX_NAME_LENGTH),
|
|
56
|
-
pydantic.WrapValidator(validate_index_name),
|
|
57
|
-
]
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class CreateIndexRequest(BaseModel):
|
|
61
|
-
name: IndexNameStr
|
|
62
|
-
dimension: int
|
|
63
|
-
metric: str
|
|
64
|
-
spec: dict[str, Any] = {}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class Vector(BaseModel):
|
|
68
|
-
id: str = Field(min_length=1, max_length=MAX_VECTOR_ID_LENGTH)
|
|
69
|
-
values: list[float]
|
|
70
|
-
metadata: dict[str, Any] = {}
|
|
71
|
-
|
|
72
|
-
@field_validator("metadata", mode="after")
|
|
73
|
-
@classmethod
|
|
74
|
-
def validate_metadata_size(cls, value):
|
|
75
|
-
json_value = json.dumps(value)
|
|
76
|
-
if len(json_value) > MAX_METADATA_SIZE:
|
|
77
|
-
raise MetadataTooLargeError(f"metadata size is too large: {len(json_value)} bytes")
|
|
78
|
-
return value
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class UpsertRequest(BaseModel):
|
|
82
|
-
vectors: list[Vector]
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
# Responses
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
class CreateIndexResponse(BaseModel):
|
|
89
|
-
host: str
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
class VectorId(BaseModel):
|
|
93
|
-
id: str
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
class Pagination(BaseModel):
|
|
97
|
-
next: str
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
class ListResponse(BaseModel):
|
|
101
|
-
vectors: list[VectorId]
|
|
102
|
-
pagination: Optional[Pagination] = None
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
class VectorMatch(BaseModel):
|
|
106
|
-
id: str
|
|
107
|
-
score: float
|
|
108
|
-
# Only populated if `includeValues` is set to `True
|
|
109
|
-
values: Optional[list[float]] = None
|
|
110
|
-
# Only populated if `includeMetadata` is set to `True
|
|
111
|
-
metadata: Optional[dict[str, Any]] = None
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
class QueryResponse(BaseModel):
|
|
115
|
-
matches: list[VectorMatch]
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
class IndexNamespaceStats(BaseModel):
|
|
119
|
-
vectorCount: int
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
class IndexStats(BaseModel):
|
|
123
|
-
dimension: int
|
|
124
|
-
namespaces: dict[str, IndexNamespaceStats] = {}
|
|
125
|
-
totalVectorCount: int
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class IndexStatus(BaseModel):
|
|
129
|
-
ready: bool
|
|
130
|
-
state: str
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
class IndexDescription(BaseModel):
|
|
134
|
-
dimension: int
|
|
135
|
-
host: str
|
|
136
|
-
metric: str
|
|
137
|
-
name: str
|
|
138
|
-
spec: dict[str, Any]
|
|
139
|
-
status: IndexStatus
|
|
File without changes
|
{nucliadb_utils-6.8.1.post4945.dist-info → nucliadb_utils-6.9.5.post5434.dist-info}/top_level.txt
RENAMED
|
File without changes
|