nucliadb 6.2.1.post3042__py3-none-any.whl → 6.2.1.post3059__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -155,10 +155,9 @@ class LearningConfiguration(BaseModel):
155
155
 
156
156
 
157
157
  class ProxiedLearningConfigError(Exception):
158
- def __init__(self, status_code: int, content: bytes, content_type: str):
158
+ def __init__(self, status_code: int, content: Union[str, dict[str, Any]]):
159
159
  self.status_code = status_code
160
160
  self.content = content
161
- self.content_type = content_type
162
161
 
163
162
 
164
163
  def raise_for_status(response: httpx.Response) -> None:
@@ -166,10 +165,13 @@ def raise_for_status(response: httpx.Response) -> None:
166
165
  response.raise_for_status()
167
166
  except httpx.HTTPStatusError as err:
168
167
  content_type = err.response.headers.get("Content-Type", "application/json")
168
+ if content_type == "application/json":
169
+ content = err.response.json()
170
+ else:
171
+ content = err.response.text
169
172
  raise ProxiedLearningConfigError(
170
173
  status_code=err.response.status_code,
171
- content=err.response.content,
172
- content_type=content_type,
174
+ content=content,
173
175
  )
174
176
 
175
177
 
@@ -18,48 +18,116 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from fastapi import Response
21
+ from fastapi import HTTPException, Response
22
22
  from fastapi_versioning import version
23
23
  from starlette.requests import Request
24
24
 
25
25
  from nucliadb import learning_proxy
26
+ from nucliadb.common import datamanagers
26
27
  from nucliadb.ingest.orm.exceptions import VectorSetConflict
27
- from nucliadb.models.responses import HTTPConflict
28
- from nucliadb.writer import vectorsets
28
+ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
29
+ from nucliadb.writer import logger
29
30
  from nucliadb.writer.api.v1.router import KB_PREFIX, api
30
31
  from nucliadb_models.resource import (
31
32
  NucliaDBRoles,
32
33
  )
34
+ from nucliadb_models.vectorsets import CreatedVectorSet
35
+ from nucliadb_protos import knowledgebox_pb2
36
+ from nucliadb_telemetry import errors
33
37
  from nucliadb_utils.authentication import requires_one
38
+ from nucliadb_utils.utilities import get_storage
34
39
 
35
40
 
36
41
  @api.post(
37
42
  f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
38
- status_code=200,
43
+ status_code=201,
39
44
  summary="Add a vectorset to Knowledge Box",
40
- tags=["Knowledge Boxes"],
45
+ tags=["VectorSets"],
41
46
  # TODO: remove when the feature is mature
42
47
  include_in_schema=False,
43
48
  )
44
49
  @requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
45
50
  @version(1)
46
- async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
51
+ async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> CreatedVectorSet:
47
52
  try:
48
- await vectorsets.add(kbid, vectorset_id)
53
+ await _add_vectorset(kbid, vectorset_id)
54
+
49
55
  except learning_proxy.ProxiedLearningConfigError as err:
50
- return Response(
56
+ raise HTTPException(
51
57
  status_code=err.status_code,
52
- content=err.content,
53
- media_type=err.content_type,
58
+ detail=err.content,
59
+ )
60
+
61
+ except VectorSetConflict:
62
+ raise HTTPException(
63
+ status_code=409,
64
+ detail="A vectorset with this embedding model already exists in your KB",
65
+ )
66
+
67
+ return CreatedVectorSet(id=vectorset_id)
68
+
69
+
70
+ async def _add_vectorset(kbid: str, vectorset_id: str) -> None:
71
+ # First off, add the vectorset to the learning configuration if it's not already there
72
+ lconfig = await learning_proxy.get_configuration(kbid)
73
+ assert lconfig is not None
74
+ semantic_models = lconfig.model_dump()["semantic_models"]
75
+ if vectorset_id not in semantic_models:
76
+ semantic_models.append(vectorset_id)
77
+ await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
78
+ lconfig = await learning_proxy.get_configuration(kbid)
79
+ assert lconfig is not None
80
+
81
+ # Then, add the vectorset to the index if it's not already there
82
+ storage = await get_storage()
83
+ vectorset_config = get_vectorset_config(lconfig, vectorset_id)
84
+ async with datamanagers.with_rw_transaction() as txn:
85
+ kbobj = KnowledgeBox(txn, storage, kbid)
86
+ await kbobj.create_vectorset(vectorset_config)
87
+ await txn.commit()
88
+
89
+
90
+ def get_vectorset_config(
91
+ learning_config: learning_proxy.LearningConfiguration, vectorset_id: str
92
+ ) -> knowledgebox_pb2.VectorSetConfig:
93
+ """
94
+ Create a VectorSetConfig from a LearningConfiguration for a given vectorset_id
95
+ """
96
+ vectorset_config = knowledgebox_pb2.VectorSetConfig(vectorset_id=vectorset_id)
97
+ vectorset_index_config = knowledgebox_pb2.VectorIndexConfig(
98
+ vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
99
+ )
100
+ model_config = learning_config.semantic_model_configs[vectorset_id]
101
+
102
+ # Parse similarity function
103
+ parsed_similarity = learning_proxy.SimilarityFunction(model_config.similarity)
104
+ if parsed_similarity == learning_proxy.SimilarityFunction.COSINE.value:
105
+ vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.COSINE
106
+ elif parsed_similarity == learning_proxy.SimilarityFunction.DOT.value:
107
+ vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.DOT
108
+ else:
109
+ raise ValueError(
110
+ f"Unknown similarity function {model_config.similarity}, parsed as {parsed_similarity}"
54
111
  )
55
- return Response(status_code=200)
112
+
113
+ # Parse vector dimension
114
+ vectorset_index_config.vector_dimension = model_config.size
115
+
116
+ # Parse matryoshka dimensions
117
+ if len(model_config.matryoshka_dims) > 0:
118
+ vectorset_index_config.normalize_vectors = True
119
+ vectorset_config.matryoshka_dimensions.extend(model_config.matryoshka_dims)
120
+ else:
121
+ vectorset_index_config.normalize_vectors = False
122
+ vectorset_config.vectorset_index_config.CopyFrom(vectorset_index_config)
123
+ return vectorset_config
56
124
 
57
125
 
58
126
  @api.delete(
59
127
  f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
60
- status_code=200,
128
+ status_code=204,
61
129
  summary="Delete vectorset from Knowledge Box",
62
- tags=["Knowledge Boxes"],
130
+ tags=["VectorSets"],
63
131
  # TODO: remove when the feature is mature
64
132
  include_in_schema=False,
65
133
  )
@@ -67,13 +135,43 @@ async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Respo
67
135
  @version(1)
68
136
  async def delete_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
69
137
  try:
70
- await vectorsets.delete(kbid, vectorset_id)
138
+ await _delete_vectorset(kbid, vectorset_id)
139
+
71
140
  except VectorSetConflict as exc:
72
- return HTTPConflict(detail=str(exc))
141
+ raise HTTPException(
142
+ status_code=409,
143
+ detail=str(exc),
144
+ )
145
+
73
146
  except learning_proxy.ProxiedLearningConfigError as err:
74
- return Response(
147
+ raise HTTPException(
75
148
  status_code=err.status_code,
76
- content=err.content,
77
- media_type=err.content_type,
149
+ detail=err.content,
150
+ )
151
+
152
+ return Response(status_code=204)
153
+
154
+
155
+ async def _delete_vectorset(kbid: str, vectorset_id: str) -> None:
156
+ lconfig = await learning_proxy.get_configuration(kbid)
157
+ if lconfig is not None:
158
+ semantic_models = lconfig.model_dump()["semantic_models"]
159
+ if vectorset_id in semantic_models:
160
+ semantic_models.remove(vectorset_id)
161
+ await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
162
+
163
+ storage = await get_storage()
164
+ try:
165
+ async with datamanagers.with_rw_transaction() as txn:
166
+ kbobj = KnowledgeBox(txn, storage, kbid)
167
+ await kbobj.delete_vectorset(vectorset_id=vectorset_id)
168
+ await txn.commit()
169
+
170
+ except VectorSetConflict:
171
+ # caller should handle this error
172
+ raise
173
+ except Exception as ex:
174
+ errors.capture_exception(ex)
175
+ logger.exception(
176
+ "Could not delete vectorset from index", extra={"kbid": kbid, "vectorset_id": vectorset_id}
78
177
  )
79
- return Response(status_code=200)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3042
3
+ Version: 6.2.1.post3059
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3042
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3042
27
- Requires-Dist: nucliadb-protos>=6.2.1.post3042
28
- Requires-Dist: nucliadb-models>=6.2.1.post3042
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3059
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3059
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post3059
28
+ Requires-Dist: nucliadb-models>=6.2.1.post3059
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nuclia-models>=0.24.2
31
31
  Requires-Dist: uvicorn
@@ -32,7 +32,7 @@ migrations/pg/0003_catalog_kbid_index.py,sha256=uKq_vtnuf73GVf0mtl2rhzdk_czAoEU1
32
32
  migrations/pg/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
33
33
  nucliadb/__init__.py,sha256=_abCmDJ_0ku483Os4UAjPX7Nywm39cQgAV_DiyjsKeQ,891
34
34
  nucliadb/health.py,sha256=UIxxA4oms4HIsCRZM_SZsdkIZIlgzmOxw-qSHLlWuak,3465
35
- nucliadb/learning_proxy.py,sha256=LxsGbYD-kwCY6wlZWOhGv2kiDJKGz623J7WDfL38yHw,19359
35
+ nucliadb/learning_proxy.py,sha256=rQ9gOLy_NwcVgsSi4jyYYHFdo6Vnb-1tEJ4kz2PIo_4,19411
36
36
  nucliadb/metrics_exporter.py,sha256=Rz6G7V_C_GTZCFzd0xEtIfixtZgUuffnr4rDKCbXXWM,5595
37
37
  nucliadb/openapi.py,sha256=wDiw0dVEvTpJvbatkJ0JZLkKm9RItZT5PWRHjqRfqTA,2272
38
38
  nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -300,7 +300,6 @@ nucliadb/writer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
300
300
  nucliadb/writer/run.py,sha256=euVZ_rtHDXs-O1kB-Pt1Id8eft9CYVpWH3zJzEoEqls,1448
301
301
  nucliadb/writer/settings.py,sha256=pA9aMAvY8H6zvsxAOdGY8SZLrThDvJ8KLhluGI0GxnQ,3288
302
302
  nucliadb/writer/utilities.py,sha256=AZ5qEny1Xm0IDsFtH13oJa2usvJZK8f0FdgF1LrnLCw,1036
303
- nucliadb/writer/vectorsets.py,sha256=18XJvsyi0-tePQWig8dl5qaNPaufEZb0-uD22IAOTa0,5648
304
303
  nucliadb/writer/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
305
304
  nucliadb/writer/api/constants.py,sha256=qWEDjFUycrEZnSJyLnNK4PQNodU2oVmkO4NycaEZtio,1738
306
305
  nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,1313
@@ -315,7 +314,7 @@ nucliadb/writer/api/v1/services.py,sha256=U8OGxhA1tdt-wxw2uDAjFpwFXFEXSDTfBe1iV5
315
314
  nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
316
315
  nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
317
316
  nucliadb/writer/api/v1/upload.py,sha256=VOeqNTrZx1_z8iaKjM7p8fVlVcIYMtnQNK1dm72ct6k,33161
318
- nucliadb/writer/api/v1/vectorsets.py,sha256=KHbVKVG3oKmy53PFW0oDCDCVlZik9MBd-9NcAWph1U0,2818
317
+ nucliadb/writer/api/v1/vectorsets.py,sha256=7gT_aQNYLmNw1Ows_8Bpv-MdmipwD-XcAgX3aUpDX1Q,6745
319
318
  nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
320
319
  nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
321
320
  nucliadb/writer/resource/basic.py,sha256=l9zD-Qiq4eUkHezMf0w1Ksx2izKYLYuNoMIlXcNxxpM,11163
@@ -330,9 +329,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
330
329
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
331
330
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
332
331
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
333
- nucliadb-6.2.1.post3042.dist-info/METADATA,sha256=Y1qjgKs0OvJWldKrf8uansaxl1wbIdKdYOoSU4F7jcA,4603
334
- nucliadb-6.2.1.post3042.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
335
- nucliadb-6.2.1.post3042.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
336
- nucliadb-6.2.1.post3042.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
337
- nucliadb-6.2.1.post3042.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
338
- nucliadb-6.2.1.post3042.dist-info/RECORD,,
332
+ nucliadb-6.2.1.post3059.dist-info/METADATA,sha256=dGsG9jFB0KwE5eGxUB1DhoaqJmIaUzyiTDKhOJ0eSdg,4603
333
+ nucliadb-6.2.1.post3059.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
334
+ nucliadb-6.2.1.post3059.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
335
+ nucliadb-6.2.1.post3059.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
336
+ nucliadb-6.2.1.post3059.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
337
+ nucliadb-6.2.1.post3059.dist-info/RECORD,,
@@ -1,132 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- # Copyright (C) 2021 Bosutech XXI S.L.
21
- #
22
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
23
- # For commercial licensing, contact us at info@nuclia.com.
24
- #
25
- # AGPL:
26
- # This program is free software: you can redistribute it and/or modify
27
- # it under the terms of the GNU Affero General Public License as
28
- # published by the Free Software Foundation, either version 3 of the
29
- # License, or (at your option) any later version.
30
- #
31
- # This program is distributed in the hope that it will be useful,
32
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
33
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34
- # GNU Affero General Public License for more details.
35
- #
36
- # You should have received a copy of the GNU Affero General Public License
37
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
38
- #
39
-
40
- from nucliadb import learning_proxy
41
- from nucliadb.common import datamanagers
42
- from nucliadb.ingest.orm.exceptions import VectorSetConflict
43
- from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
44
- from nucliadb.writer import logger
45
- from nucliadb_protos import knowledgebox_pb2
46
- from nucliadb_telemetry import errors
47
- from nucliadb_utils.utilities import get_storage
48
-
49
-
50
- async def add(kbid: str, vectorset_id: str) -> None:
51
- # First off, add the vectorset to the learning configuration if it's not already there
52
- lconfig = await learning_proxy.get_configuration(kbid)
53
- assert lconfig is not None
54
- semantic_models = lconfig.model_dump()["semantic_models"]
55
- if vectorset_id not in semantic_models:
56
- semantic_models.append(vectorset_id)
57
- await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
58
- lconfig = await learning_proxy.get_configuration(kbid)
59
- assert lconfig is not None
60
-
61
- # Then, add the vectorset to the index if it's not already there
62
- storage = await get_storage()
63
- vectorset_config = get_vectorset_config(lconfig, vectorset_id)
64
- async with datamanagers.with_rw_transaction() as txn:
65
- kbobj = KnowledgeBox(txn, storage, kbid)
66
- try:
67
- await kbobj.create_vectorset(vectorset_config)
68
- await txn.commit()
69
- except VectorSetConflict:
70
- # Vectorset already exists, nothing to do
71
- return
72
-
73
-
74
- async def delete(kbid: str, vectorset_id: str) -> None:
75
- lconfig = await learning_proxy.get_configuration(kbid)
76
- if lconfig is not None:
77
- semantic_models = lconfig.model_dump()["semantic_models"]
78
- if vectorset_id in semantic_models:
79
- semantic_models.remove(vectorset_id)
80
- await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
81
-
82
- storage = await get_storage()
83
- try:
84
- async with datamanagers.with_rw_transaction() as txn:
85
- kbobj = KnowledgeBox(txn, storage, kbid)
86
- await kbobj.delete_vectorset(vectorset_id=vectorset_id)
87
- await txn.commit()
88
-
89
- except VectorSetConflict:
90
- # caller should handle this error
91
- raise
92
- except Exception as ex:
93
- errors.capture_exception(ex)
94
- logger.exception(
95
- "Could not delete vectorset from index", extra={"kbid": kbid, "vectorset_id": vectorset_id}
96
- )
97
-
98
-
99
- def get_vectorset_config(
100
- learning_config: learning_proxy.LearningConfiguration, vectorset_id: str
101
- ) -> knowledgebox_pb2.VectorSetConfig:
102
- """
103
- Create a VectorSetConfig from a LearningConfiguration for a given vectorset_id
104
- """
105
- vectorset_config = knowledgebox_pb2.VectorSetConfig(vectorset_id=vectorset_id)
106
- vectorset_index_config = knowledgebox_pb2.VectorIndexConfig(
107
- vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
108
- )
109
- model_config = learning_config.semantic_model_configs[vectorset_id]
110
-
111
- # Parse similarity function
112
- parsed_similarity = learning_proxy.SimilarityFunction(model_config.similarity)
113
- if parsed_similarity == learning_proxy.SimilarityFunction.COSINE.value:
114
- vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.COSINE
115
- elif parsed_similarity == learning_proxy.SimilarityFunction.DOT.value:
116
- vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.DOT
117
- else:
118
- raise ValueError(
119
- f"Unknown similarity function {model_config.similarity}, parsed as {parsed_similarity}"
120
- )
121
-
122
- # Parse vector dimension
123
- vectorset_index_config.vector_dimension = model_config.size
124
-
125
- # Parse matryoshka dimensions
126
- if len(model_config.matryoshka_dims) > 0:
127
- vectorset_index_config.normalize_vectors = True
128
- vectorset_config.matryoshka_dimensions.extend(model_config.matryoshka_dims)
129
- else:
130
- vectorset_index_config.normalize_vectors = False
131
- vectorset_config.vectorset_index_config.CopyFrom(vectorset_index_config)
132
- return vectorset_config