nucliadb 6.2.1.post3042__py3-none-any.whl → 6.2.1.post3059__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/learning_proxy.py +6 -4
- nucliadb/writer/api/v1/vectorsets.py +117 -19
- {nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/RECORD +8 -9
- nucliadb/writer/vectorsets.py +0 -132
- {nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/zip-safe +0 -0
nucliadb/learning_proxy.py
CHANGED
@@ -155,10 +155,9 @@ class LearningConfiguration(BaseModel):
|
|
155
155
|
|
156
156
|
|
157
157
|
class ProxiedLearningConfigError(Exception):
|
158
|
-
def __init__(self, status_code: int, content:
|
158
|
+
def __init__(self, status_code: int, content: Union[str, dict[str, Any]]):
|
159
159
|
self.status_code = status_code
|
160
160
|
self.content = content
|
161
|
-
self.content_type = content_type
|
162
161
|
|
163
162
|
|
164
163
|
def raise_for_status(response: httpx.Response) -> None:
|
@@ -166,10 +165,13 @@ def raise_for_status(response: httpx.Response) -> None:
|
|
166
165
|
response.raise_for_status()
|
167
166
|
except httpx.HTTPStatusError as err:
|
168
167
|
content_type = err.response.headers.get("Content-Type", "application/json")
|
168
|
+
if content_type == "application/json":
|
169
|
+
content = err.response.json()
|
170
|
+
else:
|
171
|
+
content = err.response.text
|
169
172
|
raise ProxiedLearningConfigError(
|
170
173
|
status_code=err.response.status_code,
|
171
|
-
content=
|
172
|
-
content_type=content_type,
|
174
|
+
content=content,
|
173
175
|
)
|
174
176
|
|
175
177
|
|
@@ -18,48 +18,116 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from fastapi import Response
|
21
|
+
from fastapi import HTTPException, Response
|
22
22
|
from fastapi_versioning import version
|
23
23
|
from starlette.requests import Request
|
24
24
|
|
25
25
|
from nucliadb import learning_proxy
|
26
|
+
from nucliadb.common import datamanagers
|
26
27
|
from nucliadb.ingest.orm.exceptions import VectorSetConflict
|
27
|
-
from nucliadb.
|
28
|
-
from nucliadb.writer import
|
28
|
+
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
29
|
+
from nucliadb.writer import logger
|
29
30
|
from nucliadb.writer.api.v1.router import KB_PREFIX, api
|
30
31
|
from nucliadb_models.resource import (
|
31
32
|
NucliaDBRoles,
|
32
33
|
)
|
34
|
+
from nucliadb_models.vectorsets import CreatedVectorSet
|
35
|
+
from nucliadb_protos import knowledgebox_pb2
|
36
|
+
from nucliadb_telemetry import errors
|
33
37
|
from nucliadb_utils.authentication import requires_one
|
38
|
+
from nucliadb_utils.utilities import get_storage
|
34
39
|
|
35
40
|
|
36
41
|
@api.post(
|
37
42
|
f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
|
38
|
-
status_code=
|
43
|
+
status_code=201,
|
39
44
|
summary="Add a vectorset to Knowledge Box",
|
40
|
-
tags=["
|
45
|
+
tags=["VectorSets"],
|
41
46
|
# TODO: remove when the feature is mature
|
42
47
|
include_in_schema=False,
|
43
48
|
)
|
44
49
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
45
50
|
@version(1)
|
46
|
-
async def add_vectorset(request: Request, kbid: str, vectorset_id: str) ->
|
51
|
+
async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> CreatedVectorSet:
|
47
52
|
try:
|
48
|
-
await
|
53
|
+
await _add_vectorset(kbid, vectorset_id)
|
54
|
+
|
49
55
|
except learning_proxy.ProxiedLearningConfigError as err:
|
50
|
-
|
56
|
+
raise HTTPException(
|
51
57
|
status_code=err.status_code,
|
52
|
-
|
53
|
-
|
58
|
+
detail=err.content,
|
59
|
+
)
|
60
|
+
|
61
|
+
except VectorSetConflict:
|
62
|
+
raise HTTPException(
|
63
|
+
status_code=409,
|
64
|
+
detail="A vectorset with this embedding model already exists in your KB",
|
65
|
+
)
|
66
|
+
|
67
|
+
return CreatedVectorSet(id=vectorset_id)
|
68
|
+
|
69
|
+
|
70
|
+
async def _add_vectorset(kbid: str, vectorset_id: str) -> None:
|
71
|
+
# First off, add the vectorset to the learning configuration if it's not already there
|
72
|
+
lconfig = await learning_proxy.get_configuration(kbid)
|
73
|
+
assert lconfig is not None
|
74
|
+
semantic_models = lconfig.model_dump()["semantic_models"]
|
75
|
+
if vectorset_id not in semantic_models:
|
76
|
+
semantic_models.append(vectorset_id)
|
77
|
+
await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
|
78
|
+
lconfig = await learning_proxy.get_configuration(kbid)
|
79
|
+
assert lconfig is not None
|
80
|
+
|
81
|
+
# Then, add the vectorset to the index if it's not already there
|
82
|
+
storage = await get_storage()
|
83
|
+
vectorset_config = get_vectorset_config(lconfig, vectorset_id)
|
84
|
+
async with datamanagers.with_rw_transaction() as txn:
|
85
|
+
kbobj = KnowledgeBox(txn, storage, kbid)
|
86
|
+
await kbobj.create_vectorset(vectorset_config)
|
87
|
+
await txn.commit()
|
88
|
+
|
89
|
+
|
90
|
+
def get_vectorset_config(
|
91
|
+
learning_config: learning_proxy.LearningConfiguration, vectorset_id: str
|
92
|
+
) -> knowledgebox_pb2.VectorSetConfig:
|
93
|
+
"""
|
94
|
+
Create a VectorSetConfig from a LearningConfiguration for a given vectorset_id
|
95
|
+
"""
|
96
|
+
vectorset_config = knowledgebox_pb2.VectorSetConfig(vectorset_id=vectorset_id)
|
97
|
+
vectorset_index_config = knowledgebox_pb2.VectorIndexConfig(
|
98
|
+
vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
|
99
|
+
)
|
100
|
+
model_config = learning_config.semantic_model_configs[vectorset_id]
|
101
|
+
|
102
|
+
# Parse similarity function
|
103
|
+
parsed_similarity = learning_proxy.SimilarityFunction(model_config.similarity)
|
104
|
+
if parsed_similarity == learning_proxy.SimilarityFunction.COSINE.value:
|
105
|
+
vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.COSINE
|
106
|
+
elif parsed_similarity == learning_proxy.SimilarityFunction.DOT.value:
|
107
|
+
vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.DOT
|
108
|
+
else:
|
109
|
+
raise ValueError(
|
110
|
+
f"Unknown similarity function {model_config.similarity}, parsed as {parsed_similarity}"
|
54
111
|
)
|
55
|
-
|
112
|
+
|
113
|
+
# Parse vector dimension
|
114
|
+
vectorset_index_config.vector_dimension = model_config.size
|
115
|
+
|
116
|
+
# Parse matryoshka dimensions
|
117
|
+
if len(model_config.matryoshka_dims) > 0:
|
118
|
+
vectorset_index_config.normalize_vectors = True
|
119
|
+
vectorset_config.matryoshka_dimensions.extend(model_config.matryoshka_dims)
|
120
|
+
else:
|
121
|
+
vectorset_index_config.normalize_vectors = False
|
122
|
+
vectorset_config.vectorset_index_config.CopyFrom(vectorset_index_config)
|
123
|
+
return vectorset_config
|
56
124
|
|
57
125
|
|
58
126
|
@api.delete(
|
59
127
|
f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
|
60
|
-
status_code=
|
128
|
+
status_code=204,
|
61
129
|
summary="Delete vectorset from Knowledge Box",
|
62
|
-
tags=["
|
130
|
+
tags=["VectorSets"],
|
63
131
|
# TODO: remove when the feature is mature
|
64
132
|
include_in_schema=False,
|
65
133
|
)
|
@@ -67,13 +135,43 @@ async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Respo
|
|
67
135
|
@version(1)
|
68
136
|
async def delete_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
|
69
137
|
try:
|
70
|
-
await
|
138
|
+
await _delete_vectorset(kbid, vectorset_id)
|
139
|
+
|
71
140
|
except VectorSetConflict as exc:
|
72
|
-
|
141
|
+
raise HTTPException(
|
142
|
+
status_code=409,
|
143
|
+
detail=str(exc),
|
144
|
+
)
|
145
|
+
|
73
146
|
except learning_proxy.ProxiedLearningConfigError as err:
|
74
|
-
|
147
|
+
raise HTTPException(
|
75
148
|
status_code=err.status_code,
|
76
|
-
|
77
|
-
|
149
|
+
detail=err.content,
|
150
|
+
)
|
151
|
+
|
152
|
+
return Response(status_code=204)
|
153
|
+
|
154
|
+
|
155
|
+
async def _delete_vectorset(kbid: str, vectorset_id: str) -> None:
|
156
|
+
lconfig = await learning_proxy.get_configuration(kbid)
|
157
|
+
if lconfig is not None:
|
158
|
+
semantic_models = lconfig.model_dump()["semantic_models"]
|
159
|
+
if vectorset_id in semantic_models:
|
160
|
+
semantic_models.remove(vectorset_id)
|
161
|
+
await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
|
162
|
+
|
163
|
+
storage = await get_storage()
|
164
|
+
try:
|
165
|
+
async with datamanagers.with_rw_transaction() as txn:
|
166
|
+
kbobj = KnowledgeBox(txn, storage, kbid)
|
167
|
+
await kbobj.delete_vectorset(vectorset_id=vectorset_id)
|
168
|
+
await txn.commit()
|
169
|
+
|
170
|
+
except VectorSetConflict:
|
171
|
+
# caller should handle this error
|
172
|
+
raise
|
173
|
+
except Exception as ex:
|
174
|
+
errors.capture_exception(ex)
|
175
|
+
logger.exception(
|
176
|
+
"Could not delete vectorset from index", extra={"kbid": kbid, "vectorset_id": vectorset_id}
|
78
177
|
)
|
79
|
-
return Response(status_code=200)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post3059
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3059
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3059
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post3059
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post3059
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nuclia-models>=0.24.2
|
31
31
|
Requires-Dist: uvicorn
|
@@ -32,7 +32,7 @@ migrations/pg/0003_catalog_kbid_index.py,sha256=uKq_vtnuf73GVf0mtl2rhzdk_czAoEU1
|
|
32
32
|
migrations/pg/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
33
33
|
nucliadb/__init__.py,sha256=_abCmDJ_0ku483Os4UAjPX7Nywm39cQgAV_DiyjsKeQ,891
|
34
34
|
nucliadb/health.py,sha256=UIxxA4oms4HIsCRZM_SZsdkIZIlgzmOxw-qSHLlWuak,3465
|
35
|
-
nucliadb/learning_proxy.py,sha256=
|
35
|
+
nucliadb/learning_proxy.py,sha256=rQ9gOLy_NwcVgsSi4jyYYHFdo6Vnb-1tEJ4kz2PIo_4,19411
|
36
36
|
nucliadb/metrics_exporter.py,sha256=Rz6G7V_C_GTZCFzd0xEtIfixtZgUuffnr4rDKCbXXWM,5595
|
37
37
|
nucliadb/openapi.py,sha256=wDiw0dVEvTpJvbatkJ0JZLkKm9RItZT5PWRHjqRfqTA,2272
|
38
38
|
nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -300,7 +300,6 @@ nucliadb/writer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
300
300
|
nucliadb/writer/run.py,sha256=euVZ_rtHDXs-O1kB-Pt1Id8eft9CYVpWH3zJzEoEqls,1448
|
301
301
|
nucliadb/writer/settings.py,sha256=pA9aMAvY8H6zvsxAOdGY8SZLrThDvJ8KLhluGI0GxnQ,3288
|
302
302
|
nucliadb/writer/utilities.py,sha256=AZ5qEny1Xm0IDsFtH13oJa2usvJZK8f0FdgF1LrnLCw,1036
|
303
|
-
nucliadb/writer/vectorsets.py,sha256=18XJvsyi0-tePQWig8dl5qaNPaufEZb0-uD22IAOTa0,5648
|
304
303
|
nucliadb/writer/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
305
304
|
nucliadb/writer/api/constants.py,sha256=qWEDjFUycrEZnSJyLnNK4PQNodU2oVmkO4NycaEZtio,1738
|
306
305
|
nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,1313
|
@@ -315,7 +314,7 @@ nucliadb/writer/api/v1/services.py,sha256=U8OGxhA1tdt-wxw2uDAjFpwFXFEXSDTfBe1iV5
|
|
315
314
|
nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
|
316
315
|
nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
|
317
316
|
nucliadb/writer/api/v1/upload.py,sha256=VOeqNTrZx1_z8iaKjM7p8fVlVcIYMtnQNK1dm72ct6k,33161
|
318
|
-
nucliadb/writer/api/v1/vectorsets.py,sha256=
|
317
|
+
nucliadb/writer/api/v1/vectorsets.py,sha256=7gT_aQNYLmNw1Ows_8Bpv-MdmipwD-XcAgX3aUpDX1Q,6745
|
319
318
|
nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
320
319
|
nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
|
321
320
|
nucliadb/writer/resource/basic.py,sha256=l9zD-Qiq4eUkHezMf0w1Ksx2izKYLYuNoMIlXcNxxpM,11163
|
@@ -330,9 +329,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
330
329
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
331
330
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
332
331
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
333
|
-
nucliadb-6.2.1.
|
334
|
-
nucliadb-6.2.1.
|
335
|
-
nucliadb-6.2.1.
|
336
|
-
nucliadb-6.2.1.
|
337
|
-
nucliadb-6.2.1.
|
338
|
-
nucliadb-6.2.1.
|
332
|
+
nucliadb-6.2.1.post3059.dist-info/METADATA,sha256=dGsG9jFB0KwE5eGxUB1DhoaqJmIaUzyiTDKhOJ0eSdg,4603
|
333
|
+
nucliadb-6.2.1.post3059.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
334
|
+
nucliadb-6.2.1.post3059.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
335
|
+
nucliadb-6.2.1.post3059.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
336
|
+
nucliadb-6.2.1.post3059.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
337
|
+
nucliadb-6.2.1.post3059.dist-info/RECORD,,
|
nucliadb/writer/vectorsets.py
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
21
|
-
#
|
22
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
23
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
24
|
-
#
|
25
|
-
# AGPL:
|
26
|
-
# This program is free software: you can redistribute it and/or modify
|
27
|
-
# it under the terms of the GNU Affero General Public License as
|
28
|
-
# published by the Free Software Foundation, either version 3 of the
|
29
|
-
# License, or (at your option) any later version.
|
30
|
-
#
|
31
|
-
# This program is distributed in the hope that it will be useful,
|
32
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
33
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
34
|
-
# GNU Affero General Public License for more details.
|
35
|
-
#
|
36
|
-
# You should have received a copy of the GNU Affero General Public License
|
37
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
38
|
-
#
|
39
|
-
|
40
|
-
from nucliadb import learning_proxy
|
41
|
-
from nucliadb.common import datamanagers
|
42
|
-
from nucliadb.ingest.orm.exceptions import VectorSetConflict
|
43
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
44
|
-
from nucliadb.writer import logger
|
45
|
-
from nucliadb_protos import knowledgebox_pb2
|
46
|
-
from nucliadb_telemetry import errors
|
47
|
-
from nucliadb_utils.utilities import get_storage
|
48
|
-
|
49
|
-
|
50
|
-
async def add(kbid: str, vectorset_id: str) -> None:
|
51
|
-
# First off, add the vectorset to the learning configuration if it's not already there
|
52
|
-
lconfig = await learning_proxy.get_configuration(kbid)
|
53
|
-
assert lconfig is not None
|
54
|
-
semantic_models = lconfig.model_dump()["semantic_models"]
|
55
|
-
if vectorset_id not in semantic_models:
|
56
|
-
semantic_models.append(vectorset_id)
|
57
|
-
await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
|
58
|
-
lconfig = await learning_proxy.get_configuration(kbid)
|
59
|
-
assert lconfig is not None
|
60
|
-
|
61
|
-
# Then, add the vectorset to the index if it's not already there
|
62
|
-
storage = await get_storage()
|
63
|
-
vectorset_config = get_vectorset_config(lconfig, vectorset_id)
|
64
|
-
async with datamanagers.with_rw_transaction() as txn:
|
65
|
-
kbobj = KnowledgeBox(txn, storage, kbid)
|
66
|
-
try:
|
67
|
-
await kbobj.create_vectorset(vectorset_config)
|
68
|
-
await txn.commit()
|
69
|
-
except VectorSetConflict:
|
70
|
-
# Vectorset already exists, nothing to do
|
71
|
-
return
|
72
|
-
|
73
|
-
|
74
|
-
async def delete(kbid: str, vectorset_id: str) -> None:
|
75
|
-
lconfig = await learning_proxy.get_configuration(kbid)
|
76
|
-
if lconfig is not None:
|
77
|
-
semantic_models = lconfig.model_dump()["semantic_models"]
|
78
|
-
if vectorset_id in semantic_models:
|
79
|
-
semantic_models.remove(vectorset_id)
|
80
|
-
await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
|
81
|
-
|
82
|
-
storage = await get_storage()
|
83
|
-
try:
|
84
|
-
async with datamanagers.with_rw_transaction() as txn:
|
85
|
-
kbobj = KnowledgeBox(txn, storage, kbid)
|
86
|
-
await kbobj.delete_vectorset(vectorset_id=vectorset_id)
|
87
|
-
await txn.commit()
|
88
|
-
|
89
|
-
except VectorSetConflict:
|
90
|
-
# caller should handle this error
|
91
|
-
raise
|
92
|
-
except Exception as ex:
|
93
|
-
errors.capture_exception(ex)
|
94
|
-
logger.exception(
|
95
|
-
"Could not delete vectorset from index", extra={"kbid": kbid, "vectorset_id": vectorset_id}
|
96
|
-
)
|
97
|
-
|
98
|
-
|
99
|
-
def get_vectorset_config(
|
100
|
-
learning_config: learning_proxy.LearningConfiguration, vectorset_id: str
|
101
|
-
) -> knowledgebox_pb2.VectorSetConfig:
|
102
|
-
"""
|
103
|
-
Create a VectorSetConfig from a LearningConfiguration for a given vectorset_id
|
104
|
-
"""
|
105
|
-
vectorset_config = knowledgebox_pb2.VectorSetConfig(vectorset_id=vectorset_id)
|
106
|
-
vectorset_index_config = knowledgebox_pb2.VectorIndexConfig(
|
107
|
-
vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
|
108
|
-
)
|
109
|
-
model_config = learning_config.semantic_model_configs[vectorset_id]
|
110
|
-
|
111
|
-
# Parse similarity function
|
112
|
-
parsed_similarity = learning_proxy.SimilarityFunction(model_config.similarity)
|
113
|
-
if parsed_similarity == learning_proxy.SimilarityFunction.COSINE.value:
|
114
|
-
vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.COSINE
|
115
|
-
elif parsed_similarity == learning_proxy.SimilarityFunction.DOT.value:
|
116
|
-
vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.DOT
|
117
|
-
else:
|
118
|
-
raise ValueError(
|
119
|
-
f"Unknown similarity function {model_config.similarity}, parsed as {parsed_similarity}"
|
120
|
-
)
|
121
|
-
|
122
|
-
# Parse vector dimension
|
123
|
-
vectorset_index_config.vector_dimension = model_config.size
|
124
|
-
|
125
|
-
# Parse matryoshka dimensions
|
126
|
-
if len(model_config.matryoshka_dims) > 0:
|
127
|
-
vectorset_index_config.normalize_vectors = True
|
128
|
-
vectorset_config.matryoshka_dimensions.extend(model_config.matryoshka_dims)
|
129
|
-
else:
|
130
|
-
vectorset_index_config.normalize_vectors = False
|
131
|
-
vectorset_config.vectorset_index_config.CopyFrom(vectorset_index_config)
|
132
|
-
return vectorset_config
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|