PyPI - nucliadb - Versions diffs - 6.2.1.post3042__py3-none-any.whl → 6.2.1.post3059__py3-none-any.whl - Mend

nucliadb 6.2.1.post3042py3-none-any.whl → 6.2.1.post3059py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

nucliadb/learning_proxy.py CHANGED Viewed

@@ -155,10 +155,9 @@ class LearningConfiguration(BaseModel):
 class ProxiedLearningConfigError(Exception):
-    def __init__(self, status_code: int, content: bytes, content_type: str):
+    def __init__(self, status_code: int, content: Union[str, dict[str, Any]]):
         self.status_code = status_code
         self.content = content
-        self.content_type = content_type
 def raise_for_status(response: httpx.Response) -> None:
@@ -166,10 +165,13 @@ def raise_for_status(response: httpx.Response) -> None:
         response.raise_for_status()
     except httpx.HTTPStatusError as err:
         content_type = err.response.headers.get("Content-Type", "application/json")
+        if content_type == "application/json":
+            content = err.response.json()
+        else:
+            content = err.response.text
         raise ProxiedLearningConfigError(
             status_code=err.response.status_code,
-            content=err.response.content,
-            content_type=content_type,
+            content=content,
         )

nucliadb/writer/api/v1/vectorsets.py CHANGED Viewed

@@ -18,48 +18,116 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from fastapi import Response
+from fastapi import HTTPException, Response
 from fastapi_versioning import version
 from starlette.requests import Request
 from nucliadb import learning_proxy
+from nucliadb.common import datamanagers
 from nucliadb.ingest.orm.exceptions import VectorSetConflict
-from nucliadb.models.responses import HTTPConflict
-from nucliadb.writer import vectorsets
+from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
+from nucliadb.writer import logger
 from nucliadb.writer.api.v1.router import KB_PREFIX, api
 from nucliadb_models.resource import (
     NucliaDBRoles,
 )
+from nucliadb_models.vectorsets import CreatedVectorSet
+from nucliadb_protos import knowledgebox_pb2
+from nucliadb_telemetry import errors
 from nucliadb_utils.authentication import requires_one
+from nucliadb_utils.utilities import get_storage
 @api.post(
     f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
-    status_code=200,
+    status_code=201,
     summary="Add a vectorset to Knowledge Box",
-    tags=["Knowledge Boxes"],
+    tags=["VectorSets"],
     # TODO: remove when the feature is mature
     include_in_schema=False,
 )
 @requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
 @version(1)
-async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
+async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> CreatedVectorSet:
     try:
-        await vectorsets.add(kbid, vectorset_id)
+        await _add_vectorset(kbid, vectorset_id)
     except learning_proxy.ProxiedLearningConfigError as err:
-        return Response(
+        raise HTTPException(
             status_code=err.status_code,
-            content=err.content,
-            media_type=err.content_type,
+            detail=err.content,
+        )
+    except VectorSetConflict:
+        raise HTTPException(
+            status_code=409,
+            detail="A vectorset with this embedding model already exists in your KB",
+        )
+    return CreatedVectorSet(id=vectorset_id)
+async def _add_vectorset(kbid: str, vectorset_id: str) -> None:
+    # First off, add the vectorset to the learning configuration if it's not already there
+    lconfig = await learning_proxy.get_configuration(kbid)
+    assert lconfig is not None
+    semantic_models = lconfig.model_dump()["semantic_models"]
+    if vectorset_id not in semantic_models:
+        semantic_models.append(vectorset_id)
+        await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
+        lconfig = await learning_proxy.get_configuration(kbid)
+        assert lconfig is not None
+    # Then, add the vectorset to the index if it's not already there
+    storage = await get_storage()
+    vectorset_config = get_vectorset_config(lconfig, vectorset_id)
+    async with datamanagers.with_rw_transaction() as txn:
+        kbobj = KnowledgeBox(txn, storage, kbid)
+        await kbobj.create_vectorset(vectorset_config)
+        await txn.commit()
+def get_vectorset_config(
+    learning_config: learning_proxy.LearningConfiguration, vectorset_id: str
+) -> knowledgebox_pb2.VectorSetConfig:
+    """
+    Create a VectorSetConfig from a LearningConfiguration for a given vectorset_id
+    """
+    vectorset_config = knowledgebox_pb2.VectorSetConfig(vectorset_id=vectorset_id)
+    vectorset_index_config = knowledgebox_pb2.VectorIndexConfig(
+        vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
+    )
+    model_config = learning_config.semantic_model_configs[vectorset_id]
+    # Parse similarity function
+    parsed_similarity = learning_proxy.SimilarityFunction(model_config.similarity)
+    if parsed_similarity == learning_proxy.SimilarityFunction.COSINE.value:
+        vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.COSINE
+    elif parsed_similarity == learning_proxy.SimilarityFunction.DOT.value:
+        vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.DOT
+    else:
+        raise ValueError(
+            f"Unknown similarity function {model_config.similarity}, parsed as {parsed_similarity}"
         )
-    return Response(status_code=200)
+    # Parse vector dimension
+    vectorset_index_config.vector_dimension = model_config.size
+    # Parse matryoshka dimensions
+    if len(model_config.matryoshka_dims) > 0:
+        vectorset_index_config.normalize_vectors = True
+        vectorset_config.matryoshka_dimensions.extend(model_config.matryoshka_dims)
+    else:
+        vectorset_index_config.normalize_vectors = False
+    vectorset_config.vectorset_index_config.CopyFrom(vectorset_index_config)
+    return vectorset_config
 @api.delete(
     f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
-    status_code=200,
+    status_code=204,
     summary="Delete vectorset from Knowledge Box",
-    tags=["Knowledge Boxes"],
+    tags=["VectorSets"],
     # TODO: remove when the feature is mature
     include_in_schema=False,
 )
@@ -67,13 +135,43 @@ async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Respo
 @version(1)
 async def delete_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
     try:
-        await vectorsets.delete(kbid, vectorset_id)
+        await _delete_vectorset(kbid, vectorset_id)
     except VectorSetConflict as exc:
-        return HTTPConflict(detail=str(exc))
+        raise HTTPException(
+            status_code=409,
+            detail=str(exc),
+        )
     except learning_proxy.ProxiedLearningConfigError as err:
-        return Response(
+        raise HTTPException(
             status_code=err.status_code,
-            content=err.content,
-            media_type=err.content_type,
+            detail=err.content,
+        )
+    return Response(status_code=204)
+async def _delete_vectorset(kbid: str, vectorset_id: str) -> None:
+    lconfig = await learning_proxy.get_configuration(kbid)
+    if lconfig is not None:
+        semantic_models = lconfig.model_dump()["semantic_models"]
+        if vectorset_id in semantic_models:
+            semantic_models.remove(vectorset_id)
+            await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
+    storage = await get_storage()
+    try:
+        async with datamanagers.with_rw_transaction() as txn:
+            kbobj = KnowledgeBox(txn, storage, kbid)
+            await kbobj.delete_vectorset(vectorset_id=vectorset_id)
+            await txn.commit()
+    except VectorSetConflict:
+        # caller should handle this error
+        raise
+    except Exception as ex:
+        errors.capture_exception(ex)
+        logger.exception(
+            "Could not delete vectorset from index", extra={"kbid": kbid, "vectorset_id": vectorset_id}
         )
-    return Response(status_code=200)

{nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nucliadb
-Version: 6.2.1.post3042
+Version: 6.2.1.post3059
 Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
 Author: NucliaDB Community
 Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9, <4
 Description-Content-Type: text/markdown
-Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3042
-Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3042
-Requires-Dist: nucliadb-protos>=6.2.1.post3042
-Requires-Dist: nucliadb-models>=6.2.1.post3042
+Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3059
+Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3059
+Requires-Dist: nucliadb-protos>=6.2.1.post3059
+Requires-Dist: nucliadb-models>=6.2.1.post3059
 Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
 Requires-Dist: nuclia-models>=0.24.2
 Requires-Dist: uvicorn

{nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/RECORD RENAMED Viewed

@@ -32,7 +32,7 @@ migrations/pg/0003_catalog_kbid_index.py,sha256=uKq_vtnuf73GVf0mtl2rhzdk_czAoEU1
 migrations/pg/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/__init__.py,sha256=_abCmDJ_0ku483Os4UAjPX7Nywm39cQgAV_DiyjsKeQ,891
 nucliadb/health.py,sha256=UIxxA4oms4HIsCRZM_SZsdkIZIlgzmOxw-qSHLlWuak,3465
-nucliadb/learning_proxy.py,sha256=LxsGbYD-kwCY6wlZWOhGv2kiDJKGz623J7WDfL38yHw,19359
+nucliadb/learning_proxy.py,sha256=rQ9gOLy_NwcVgsSi4jyYYHFdo6Vnb-1tEJ4kz2PIo_4,19411
 nucliadb/metrics_exporter.py,sha256=Rz6G7V_C_GTZCFzd0xEtIfixtZgUuffnr4rDKCbXXWM,5595
 nucliadb/openapi.py,sha256=wDiw0dVEvTpJvbatkJ0JZLkKm9RItZT5PWRHjqRfqTA,2272
 nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -300,7 +300,6 @@ nucliadb/writer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nucliadb/writer/run.py,sha256=euVZ_rtHDXs-O1kB-Pt1Id8eft9CYVpWH3zJzEoEqls,1448
 nucliadb/writer/settings.py,sha256=pA9aMAvY8H6zvsxAOdGY8SZLrThDvJ8KLhluGI0GxnQ,3288
 nucliadb/writer/utilities.py,sha256=AZ5qEny1Xm0IDsFtH13oJa2usvJZK8f0FdgF1LrnLCw,1036
-nucliadb/writer/vectorsets.py,sha256=18XJvsyi0-tePQWig8dl5qaNPaufEZb0-uD22IAOTa0,5648
 nucliadb/writer/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/writer/api/constants.py,sha256=qWEDjFUycrEZnSJyLnNK4PQNodU2oVmkO4NycaEZtio,1738
 nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,1313
@@ -315,7 +314,7 @@ nucliadb/writer/api/v1/services.py,sha256=U8OGxhA1tdt-wxw2uDAjFpwFXFEXSDTfBe1iV5
 nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
 nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
 nucliadb/writer/api/v1/upload.py,sha256=VOeqNTrZx1_z8iaKjM7p8fVlVcIYMtnQNK1dm72ct6k,33161
-nucliadb/writer/api/v1/vectorsets.py,sha256=KHbVKVG3oKmy53PFW0oDCDCVlZik9MBd-9NcAWph1U0,2818
+nucliadb/writer/api/v1/vectorsets.py,sha256=7gT_aQNYLmNw1Ows_8Bpv-MdmipwD-XcAgX3aUpDX1Q,6745
 nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
 nucliadb/writer/resource/basic.py,sha256=l9zD-Qiq4eUkHezMf0w1Ksx2izKYLYuNoMIlXcNxxpM,11163
@@ -330,9 +329,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
 nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
 nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
 nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
-nucliadb-6.2.1.post3042.dist-info/METADATA,sha256=Y1qjgKs0OvJWldKrf8uansaxl1wbIdKdYOoSU4F7jcA,4603
-nucliadb-6.2.1.post3042.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-nucliadb-6.2.1.post3042.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
-nucliadb-6.2.1.post3042.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
-nucliadb-6.2.1.post3042.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-nucliadb-6.2.1.post3042.dist-info/RECORD,,
+nucliadb-6.2.1.post3059.dist-info/METADATA,sha256=dGsG9jFB0KwE5eGxUB1DhoaqJmIaUzyiTDKhOJ0eSdg,4603
+nucliadb-6.2.1.post3059.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+nucliadb-6.2.1.post3059.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
+nucliadb-6.2.1.post3059.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
+nucliadb-6.2.1.post3059.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+nucliadb-6.2.1.post3059.dist-info/RECORD,,

nucliadb/writer/vectorsets.py DELETED Viewed

@@ -1,132 +0,0 @@
-# Copyright (C) 2021 Bosutech XXI S.L.
-#
-# nucliadb is offered under the AGPL v3.0 and as commercial software.
-# For commercial licensing, contact us at info@nuclia.com.
-#
-# AGPL:
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# Copyright (C) 2021 Bosutech XXI S.L.
-#
-# nucliadb is offered under the AGPL v3.0 and as commercial software.
-# For commercial licensing, contact us at info@nuclia.com.
-#
-# AGPL:
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-from nucliadb import learning_proxy
-from nucliadb.common import datamanagers
-from nucliadb.ingest.orm.exceptions import VectorSetConflict
-from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
-from nucliadb.writer import logger
-from nucliadb_protos import knowledgebox_pb2
-from nucliadb_telemetry import errors
-from nucliadb_utils.utilities import get_storage
-async def add(kbid: str, vectorset_id: str) -> None:
-    # First off, add the vectorset to the learning configuration if it's not already there
-    lconfig = await learning_proxy.get_configuration(kbid)
-    assert lconfig is not None
-    semantic_models = lconfig.model_dump()["semantic_models"]
-    if vectorset_id not in semantic_models:
-        semantic_models.append(vectorset_id)
-        await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
-        lconfig = await learning_proxy.get_configuration(kbid)
-        assert lconfig is not None
-    # Then, add the vectorset to the index if it's not already there
-    storage = await get_storage()
-    vectorset_config = get_vectorset_config(lconfig, vectorset_id)
-    async with datamanagers.with_rw_transaction() as txn:
-        kbobj = KnowledgeBox(txn, storage, kbid)
-        try:
-            await kbobj.create_vectorset(vectorset_config)
-            await txn.commit()
-        except VectorSetConflict:
-            # Vectorset already exists, nothing to do
-            return
-async def delete(kbid: str, vectorset_id: str) -> None:
-    lconfig = await learning_proxy.get_configuration(kbid)
-    if lconfig is not None:
-        semantic_models = lconfig.model_dump()["semantic_models"]
-        if vectorset_id in semantic_models:
-            semantic_models.remove(vectorset_id)
-            await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
-    storage = await get_storage()
-    try:
-        async with datamanagers.with_rw_transaction() as txn:
-            kbobj = KnowledgeBox(txn, storage, kbid)
-            await kbobj.delete_vectorset(vectorset_id=vectorset_id)
-            await txn.commit()
-    except VectorSetConflict:
-        # caller should handle this error
-        raise
-    except Exception as ex:
-        errors.capture_exception(ex)
-        logger.exception(
-            "Could not delete vectorset from index", extra={"kbid": kbid, "vectorset_id": vectorset_id}
-        )
-def get_vectorset_config(
-    learning_config: learning_proxy.LearningConfiguration, vectorset_id: str
-) -> knowledgebox_pb2.VectorSetConfig:
-    """
-    Create a VectorSetConfig from a LearningConfiguration for a given vectorset_id
-    """
-    vectorset_config = knowledgebox_pb2.VectorSetConfig(vectorset_id=vectorset_id)
-    vectorset_index_config = knowledgebox_pb2.VectorIndexConfig(
-        vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
-    )
-    model_config = learning_config.semantic_model_configs[vectorset_id]
-    # Parse similarity function
-    parsed_similarity = learning_proxy.SimilarityFunction(model_config.similarity)
-    if parsed_similarity == learning_proxy.SimilarityFunction.COSINE.value:
-        vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.COSINE
-    elif parsed_similarity == learning_proxy.SimilarityFunction.DOT.value:
-        vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.DOT
-    else:
-        raise ValueError(
-            f"Unknown similarity function {model_config.similarity}, parsed as {parsed_similarity}"
-        )
-    # Parse vector dimension
-    vectorset_index_config.vector_dimension = model_config.size
-    # Parse matryoshka dimensions
-    if len(model_config.matryoshka_dims) > 0:
-        vectorset_index_config.normalize_vectors = True
-        vectorset_config.matryoshka_dimensions.extend(model_config.matryoshka_dims)
-    else:
-        vectorset_index_config.normalize_vectors = False
-    vectorset_config.vectorset_index_config.CopyFrom(vectorset_index_config)
-    return vectorset_config

{nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/top_level.txt RENAMED Viewed

File without changes

{nucliadb-6.2.1.post3042.dist-info → nucliadb-6.2.1.post3059.dist-info}/zip-safe RENAMED Viewed

File without changes

nucliadb 6.2.1.post3042__py3-none-any.whl → 6.2.1.post3059__py3-none-any.whl

nucliadb 6.2.1.post3042py3-none-any.whl → 6.2.1.post3059py3-none-any.whl