PyPI - nucliadb - Versions diffs - 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl - Mend

nucliadb 6.7.2.post4874py3-none-any.whl → 6.10.0.post5705py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (246) hide show

migrations/0023_backfill_pg_catalog.py +8 -4
migrations/0028_extracted_vectors_reference.py +1 -1
migrations/0029_backfill_field_status.py +3 -4
migrations/0032_remove_old_relations.py +2 -3
migrations/0038_backfill_catalog_field_labels.py +8 -4
migrations/0039_backfill_converation_splits_metadata.py +106 -0
migrations/0040_migrate_search_configurations.py +79 -0
migrations/0041_reindex_conversations.py +137 -0
migrations/pg/0010_shards_index.py +34 -0
nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
migrations/pg/0012_catalog_statistics_undo.py +26 -0
nucliadb/backups/create.py +2 -15
nucliadb/backups/restore.py +4 -15
nucliadb/backups/tasks.py +4 -1
nucliadb/common/back_pressure/cache.py +2 -3
nucliadb/common/back_pressure/materializer.py +7 -13
nucliadb/common/back_pressure/settings.py +6 -6
nucliadb/common/back_pressure/utils.py +1 -0
nucliadb/common/cache.py +9 -9
nucliadb/common/catalog/__init__.py +79 -0
nucliadb/common/catalog/dummy.py +36 -0
nucliadb/common/catalog/interface.py +85 -0
nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
nucliadb/common/catalog/utils.py +56 -0
nucliadb/common/cluster/manager.py +8 -23
nucliadb/common/cluster/rebalance.py +484 -112
nucliadb/common/cluster/rollover.py +36 -9
nucliadb/common/cluster/settings.py +4 -9
nucliadb/common/cluster/utils.py +34 -8
nucliadb/common/context/__init__.py +7 -8
nucliadb/common/context/fastapi.py +1 -2
nucliadb/common/datamanagers/__init__.py +2 -4
nucliadb/common/datamanagers/atomic.py +9 -2
nucliadb/common/datamanagers/cluster.py +1 -2
nucliadb/common/datamanagers/fields.py +3 -4
nucliadb/common/datamanagers/kb.py +6 -6
nucliadb/common/datamanagers/labels.py +2 -3
nucliadb/common/datamanagers/resources.py +10 -33
nucliadb/common/datamanagers/rollover.py +5 -7
nucliadb/common/datamanagers/search_configurations.py +1 -2
nucliadb/common/datamanagers/synonyms.py +1 -2
nucliadb/common/datamanagers/utils.py +4 -4
nucliadb/common/datamanagers/vectorsets.py +4 -4
nucliadb/common/external_index_providers/base.py +32 -5
nucliadb/common/external_index_providers/manager.py +5 -34
nucliadb/common/external_index_providers/settings.py +1 -27
nucliadb/common/filter_expression.py +129 -41
nucliadb/common/http_clients/exceptions.py +8 -0
nucliadb/common/http_clients/processing.py +16 -23
nucliadb/common/http_clients/utils.py +3 -0
nucliadb/common/ids.py +82 -58
nucliadb/common/locking.py +1 -2
nucliadb/common/maindb/driver.py +9 -8
nucliadb/common/maindb/local.py +5 -5
nucliadb/common/maindb/pg.py +9 -8
nucliadb/common/nidx.py +22 -5
nucliadb/common/vector_index_config.py +1 -1
nucliadb/export_import/datamanager.py +4 -3
nucliadb/export_import/exporter.py +11 -19
nucliadb/export_import/importer.py +13 -6
nucliadb/export_import/tasks.py +2 -0
nucliadb/export_import/utils.py +6 -18
nucliadb/health.py +2 -2
nucliadb/ingest/app.py +8 -8
nucliadb/ingest/consumer/consumer.py +8 -10
nucliadb/ingest/consumer/pull.py +10 -8
nucliadb/ingest/consumer/service.py +5 -30
nucliadb/ingest/consumer/shard_creator.py +16 -5
nucliadb/ingest/consumer/utils.py +1 -1
nucliadb/ingest/fields/base.py +37 -49
nucliadb/ingest/fields/conversation.py +55 -9
nucliadb/ingest/fields/exceptions.py +1 -2
nucliadb/ingest/fields/file.py +22 -8
nucliadb/ingest/fields/link.py +7 -7
nucliadb/ingest/fields/text.py +2 -3
nucliadb/ingest/orm/brain_v2.py +89 -57
nucliadb/ingest/orm/broker_message.py +2 -4
nucliadb/ingest/orm/entities.py +10 -209
nucliadb/ingest/orm/index_message.py +128 -113
nucliadb/ingest/orm/knowledgebox.py +91 -59
nucliadb/ingest/orm/processor/auditing.py +1 -3
nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
nucliadb/ingest/orm/processor/processor.py +98 -153
nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
nucliadb/ingest/orm/resource.py +82 -71
nucliadb/ingest/orm/utils.py +1 -1
nucliadb/ingest/partitions.py +12 -1
nucliadb/ingest/processing.py +17 -17
nucliadb/ingest/serialize.py +202 -145
nucliadb/ingest/service/writer.py +15 -114
nucliadb/ingest/settings.py +36 -15
nucliadb/ingest/utils.py +1 -2
nucliadb/learning_proxy.py +23 -26
nucliadb/metrics_exporter.py +20 -6
nucliadb/middleware/__init__.py +82 -1
nucliadb/migrator/datamanager.py +4 -11
nucliadb/migrator/migrator.py +1 -2
nucliadb/migrator/models.py +1 -2
nucliadb/migrator/settings.py +1 -2
nucliadb/models/internal/augment.py +614 -0
nucliadb/models/internal/processing.py +19 -19
nucliadb/openapi.py +2 -2
nucliadb/purge/__init__.py +3 -8
nucliadb/purge/orphan_shards.py +1 -2
nucliadb/reader/__init__.py +5 -0
nucliadb/reader/api/models.py +6 -13
nucliadb/reader/api/v1/download.py +59 -38
nucliadb/reader/api/v1/export_import.py +4 -4
nucliadb/reader/api/v1/knowledgebox.py +37 -9
nucliadb/reader/api/v1/learning_config.py +33 -14
nucliadb/reader/api/v1/resource.py +61 -9
nucliadb/reader/api/v1/services.py +18 -14
nucliadb/reader/app.py +3 -1
nucliadb/reader/reader/notifications.py +1 -2
nucliadb/search/api/v1/__init__.py +3 -0
nucliadb/search/api/v1/ask.py +3 -4
nucliadb/search/api/v1/augment.py +585 -0
nucliadb/search/api/v1/catalog.py +15 -19
nucliadb/search/api/v1/find.py +16 -22
nucliadb/search/api/v1/hydrate.py +328 -0
nucliadb/search/api/v1/knowledgebox.py +1 -2
nucliadb/search/api/v1/predict_proxy.py +1 -2
nucliadb/search/api/v1/resource/ask.py +28 -8
nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
nucliadb/search/api/v1/resource/search.py +9 -11
nucliadb/search/api/v1/retrieve.py +130 -0
nucliadb/search/api/v1/search.py +28 -32
nucliadb/search/api/v1/suggest.py +11 -14
nucliadb/search/api/v1/summarize.py +1 -2
nucliadb/search/api/v1/utils.py +2 -2
nucliadb/search/app.py +3 -2
nucliadb/search/augmentor/__init__.py +21 -0
nucliadb/search/augmentor/augmentor.py +232 -0
nucliadb/search/augmentor/fields.py +704 -0
nucliadb/search/augmentor/metrics.py +24 -0
nucliadb/search/augmentor/paragraphs.py +334 -0
nucliadb/search/augmentor/resources.py +238 -0
nucliadb/search/augmentor/utils.py +33 -0
nucliadb/search/lifecycle.py +3 -1
nucliadb/search/predict.py +33 -19
nucliadb/search/predict_models.py +8 -9
nucliadb/search/requesters/utils.py +11 -10
nucliadb/search/search/cache.py +19 -42
nucliadb/search/search/chat/ask.py +131 -59
nucliadb/search/search/chat/exceptions.py +3 -5
nucliadb/search/search/chat/fetcher.py +201 -0
nucliadb/search/search/chat/images.py +6 -4
nucliadb/search/search/chat/old_prompt.py +1375 -0
nucliadb/search/search/chat/parser.py +510 -0
nucliadb/search/search/chat/prompt.py +563 -615
nucliadb/search/search/chat/query.py +453 -32
nucliadb/search/search/chat/rpc.py +85 -0
nucliadb/search/search/fetch.py +3 -4
nucliadb/search/search/filters.py +8 -11
nucliadb/search/search/find.py +33 -31
nucliadb/search/search/find_merge.py +124 -331
nucliadb/search/search/graph_strategy.py +14 -12
nucliadb/search/search/hydrator/__init__.py +49 -0
nucliadb/search/search/hydrator/fields.py +217 -0
nucliadb/search/search/hydrator/images.py +130 -0
nucliadb/search/search/hydrator/paragraphs.py +323 -0
nucliadb/search/search/hydrator/resources.py +60 -0
nucliadb/search/search/ingestion_agents.py +5 -5
nucliadb/search/search/merge.py +90 -94
nucliadb/search/search/metrics.py +24 -7
nucliadb/search/search/paragraphs.py +7 -9
nucliadb/search/search/predict_proxy.py +44 -18
nucliadb/search/search/query.py +14 -86
nucliadb/search/search/query_parser/fetcher.py +51 -82
nucliadb/search/search/query_parser/models.py +19 -48
nucliadb/search/search/query_parser/old_filters.py +20 -19
nucliadb/search/search/query_parser/parsers/ask.py +5 -6
nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
nucliadb/search/search/query_parser/parsers/common.py +21 -13
nucliadb/search/search/query_parser/parsers/find.py +6 -29
nucliadb/search/search/query_parser/parsers/graph.py +18 -28
nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
nucliadb/search/search/query_parser/parsers/search.py +15 -56
nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
nucliadb/search/search/rank_fusion.py +18 -13
nucliadb/search/search/rerankers.py +6 -7
nucliadb/search/search/retrieval.py +300 -0
nucliadb/search/search/summarize.py +5 -6
nucliadb/search/search/utils.py +3 -4
nucliadb/search/settings.py +1 -2
nucliadb/standalone/api_router.py +1 -1
nucliadb/standalone/app.py +4 -3
nucliadb/standalone/auth.py +5 -6
nucliadb/standalone/lifecycle.py +2 -2
nucliadb/standalone/run.py +5 -4
nucliadb/standalone/settings.py +5 -6
nucliadb/standalone/versions.py +3 -4
nucliadb/tasks/consumer.py +13 -8
nucliadb/tasks/models.py +2 -1
nucliadb/tasks/producer.py +3 -3
nucliadb/tasks/retries.py +8 -7
nucliadb/train/api/utils.py +1 -3
nucliadb/train/api/v1/shards.py +1 -2
nucliadb/train/api/v1/trainset.py +1 -2
nucliadb/train/app.py +1 -1
nucliadb/train/generator.py +4 -4
nucliadb/train/generators/field_classifier.py +2 -2
nucliadb/train/generators/field_streaming.py +6 -6
nucliadb/train/generators/image_classifier.py +2 -2
nucliadb/train/generators/paragraph_classifier.py +2 -2
nucliadb/train/generators/paragraph_streaming.py +2 -2
nucliadb/train/generators/question_answer_streaming.py +2 -2
nucliadb/train/generators/sentence_classifier.py +4 -10
nucliadb/train/generators/token_classifier.py +3 -2
nucliadb/train/generators/utils.py +6 -5
nucliadb/train/nodes.py +3 -3
nucliadb/train/resource.py +6 -8
nucliadb/train/settings.py +3 -4
nucliadb/train/types.py +11 -11
nucliadb/train/upload.py +3 -2
nucliadb/train/uploader.py +1 -2
nucliadb/train/utils.py +1 -2
nucliadb/writer/api/v1/export_import.py +4 -1
nucliadb/writer/api/v1/field.py +15 -14
nucliadb/writer/api/v1/knowledgebox.py +18 -56
nucliadb/writer/api/v1/learning_config.py +5 -4
nucliadb/writer/api/v1/resource.py +9 -20
nucliadb/writer/api/v1/services.py +10 -132
nucliadb/writer/api/v1/upload.py +73 -72
nucliadb/writer/app.py +8 -2
nucliadb/writer/resource/basic.py +12 -15
nucliadb/writer/resource/field.py +43 -5
nucliadb/writer/resource/origin.py +7 -0
nucliadb/writer/settings.py +2 -3
nucliadb/writer/tus/__init__.py +2 -3
nucliadb/writer/tus/azure.py +5 -7
nucliadb/writer/tus/dm.py +3 -3
nucliadb/writer/tus/exceptions.py +3 -4
nucliadb/writer/tus/gcs.py +15 -22
nucliadb/writer/tus/s3.py +2 -3
nucliadb/writer/tus/storage.py +3 -3
{nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
nucliadb/common/datamanagers/entities.py +0 -139
nucliadb/common/external_index_providers/pinecone.py +0 -894
nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
nucliadb/search/search/hydrator.py +0 -197
nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
{nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
{nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
{nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0

nucliadb/search/search/summarize.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 import asyncio
-from typing import Optional
 from nucliadb.common import datamanagers
 from nucliadb.common.maindb.utils import get_driver
@@ -36,7 +35,7 @@ from nucliadb_models.search import (
 from nucliadb_protos.utils_pb2 import ExtractedText
 from nucliadb_utils.utilities import get_storage
-ExtractedTexts = list[tuple[str, str, Optional[ExtractedText]]]
+ExtractedTexts = list[tuple[str, str, ExtractedText | None]]
 MAX_GET_EXTRACTED_TEXT_OPS = 20
@@ -46,7 +45,7 @@ class NoResourcesToSummarize(Exception):
 async def summarize(
-    kbid: str, request: SummarizeRequest, extra_predict_headers: Optional[dict[str, str]]
+    kbid: str, request: SummarizeRequest, extra_predict_headers: dict[str, str] | None
 ) -> SummarizedResponse:
     predict_request = SummarizeModel()
     predict_request.generative_model = request.generative_model
@@ -87,7 +86,7 @@ async def get_extracted_texts(kbid: str, resource_uuids_or_slugs: list[str]) ->
             if uuid is None:
                 logger.warning(f"Resource {uuid_or_slug} not found in KB", extra={"kbid": kbid})
                 continue
-            resource_orm = Resource(txn=txn, storage=storage, kb=kb_orm, uuid=uuid)
+            resource_orm = Resource(txn=txn, storage=storage, kbid=kbid, uuid=uuid)
             fields = await resource_orm.get_fields(force=True)
             for _, field in fields.items():
                 task = asyncio.create_task(get_extracted_text(uuid_or_slug, field, max_tasks))
@@ -115,14 +114,14 @@ async def get_extracted_texts(kbid: str, resource_uuids_or_slugs: list[str]) ->
 async def get_extracted_text(
     uuid_or_slug, field: Field, max_operations: asyncio.Semaphore
-) -> tuple[str, str, Optional[ExtractedText]]:
+) -> tuple[str, str, ExtractedText | None]:
     async with max_operations:
         extracted_text = await field.get_extracted_text(force=True)
         field_key = f"{field.type}/{field.id}"
         return uuid_or_slug, field_key, extracted_text
-async def get_resource_uuid(kbobj: KnowledgeBox, uuid_or_slug: str) -> Optional[str]:
+async def get_resource_uuid(kbobj: KnowledgeBox, uuid_or_slug: str) -> str | None:
     """
     Return the uuid of the resource with the given uuid_or_slug.
     """

nucliadb/search/search/utils.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 import logging
-from typing import Optional
 from pydantic import BaseModel
@@ -30,7 +29,7 @@ from nucliadb_utils.utilities import has_feature
 logger = logging.getLogger(__name__)
-async def filter_hidden_resources(kbid: str, show_hidden: bool) -> Optional[bool]:
+async def filter_hidden_resources(kbid: str, show_hidden: bool) -> bool | None:
     kb_config = await kb.get_config(kbid=kbid)
     hidden_enabled = kb_config and kb_config.hidden_resources_enabled
     if hidden_enabled and not show_hidden:
@@ -41,8 +40,8 @@ async def filter_hidden_resources(kbid: str, show_hidden: bool) -> Optional[bool
 def min_score_from_query_params(
     min_score_bm25: float,
-    min_score_semantic: Optional[float],
-    deprecated_min_score: Optional[float],
+    min_score_semantic: float | None,
+    deprecated_min_score: float | None,
 ) -> MinScore:
     # Keep backward compatibility with the deprecated min_score parameter
     semantic = deprecated_min_score if min_score_semantic is None else min_score_semantic

nucliadb/search/settings.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Optional
 from pydantic import Field
@@ -43,7 +42,7 @@ class Settings(DriverSettings):
         title="Prequeries max parallel",
         description="The maximum number of prequeries to run in parallel per /ask request",
     )
-    nidx_address: Optional[str] = Field(default=None)
+    nidx_address: str | None = Field(default=None)
 settings = Settings()

nucliadb/standalone/api_router.py CHANGED Viewed

@@ -57,7 +57,7 @@ async def api_config_check(request: Request):
                     valid_nua_key = True
             except Exception as exc:
                 logger.warning(f"Error validating nua key", exc_info=exc)
-                nua_key_check_error = f"Error checking NUA key: {str(exc)}"
+                nua_key_check_error = f"Error checking NUA key: {exc!s}"
     return JSONResponse(
         {
             "nua_api_key": {

nucliadb/standalone/app.py CHANGED Viewed

@@ -31,7 +31,7 @@ from starlette.responses import HTMLResponse
 from starlette.routing import Mount
 import nucliadb_admin_assets  # type: ignore
-from nucliadb.middleware import ProcessTimeHeaderMiddleware
+from nucliadb.middleware import ClientErrorPayloadLoggerMiddleware, ProcessTimeHeaderMiddleware
 from nucliadb.reader import API_PREFIX
 from nucliadb.reader.api.v1.router import api as api_reader_v1
 from nucliadb.search.api.v1.router import api as api_search_v1
@@ -79,7 +79,7 @@ HOMEPAGE_HTML = """
     </ul>
 </body>
 </html>
-"""  # noqa: E501
+"""
 def application_factory(settings: Settings) -> FastAPI:
@@ -95,13 +95,13 @@ def application_factory(settings: Settings) -> FastAPI:
             backend=get_auth_backend(settings),
         ),
         Middleware(AuditMiddleware, audit_utility_getter=get_audit),
+        Middleware(ClientErrorPayloadLoggerMiddleware),
     ]
     if running_settings.debug:
         middleware.append(Middleware(ProcessTimeHeaderMiddleware))
     fastapi_settings = dict(
         debug=running_settings.debug,
-        middleware=middleware,
         lifespan=lifespan,
         exception_handlers={
             Exception: global_exception_handler,
@@ -122,6 +122,7 @@ def application_factory(settings: Settings) -> FastAPI:
         prefix_format=f"/{API_PREFIX}/v{{major}}",
         default_version=(1, 0),
         enable_latest=False,
+        middleware=middleware,
         kwargs=fastapi_settings,
     )

nucliadb/standalone/auth.py CHANGED Viewed

@@ -19,7 +19,6 @@
 import base64
 import logging
 import time
-from typing import Optional
 import orjson
 from jwcrypto import jwe, jwk  # type: ignore
@@ -51,7 +50,7 @@ def get_mapped_roles(*, settings: Settings, data: dict[str, str]) -> list[str]:
 async def authenticate_auth_token(
     settings: Settings, request: HTTPConnection
-) -> Optional[tuple[AuthCredentials, BaseUser]]:
+) -> tuple[AuthCredentials, BaseUser] | None:
     if "eph-token" not in request.query_params or settings.jwk_key is None:
         return None
@@ -81,7 +80,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
     def __init__(self, settings: Settings) -> None:
         self.settings = settings
-    async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
+    async def authenticate(self, request: HTTPConnection) -> tuple[AuthCredentials, BaseUser] | None:
         token_resp = await authenticate_auth_token(self.settings, request)
         if token_resp is not None:
             return token_resp
@@ -109,7 +108,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
     def __init__(self, settings: Settings) -> None:
         self.settings = settings
-    async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
+    async def authenticate(self, request: HTTPConnection) -> tuple[AuthCredentials, BaseUser] | None:
         token_resp = await authenticate_auth_token(self.settings, request)
         if token_resp is not None:
             return token_resp
@@ -160,7 +159,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
     def __init__(self, settings: Settings) -> None:
         self.settings = settings
-    async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
+    async def authenticate(self, request: HTTPConnection) -> tuple[AuthCredentials, BaseUser] | None:
         token_resp = await authenticate_auth_token(self.settings, request)
         if token_resp is not None:
             return token_resp
@@ -189,7 +188,7 @@ class UpstreamNaiveAuthenticationBackend(NucliaCloudAuthenticationBackend):
             user_header=settings.auth_policy_user_header,
         )
-    async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
+    async def authenticate(self, request: HTTPConnection) -> tuple[AuthCredentials, BaseUser] | None:
         token_resp = await authenticate_auth_token(self.settings, request)
         if token_resp is not None:
             return token_resp

nucliadb/standalone/lifecycle.py CHANGED Viewed

@@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-import asyncio
+import inspect
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
@@ -56,7 +56,7 @@ async def lifespan(app: FastAPI):
         yield
         for finalizer in SYNC_FINALIZERS:
-            if asyncio.iscoroutinefunction(finalizer):
+            if inspect.iscoroutinefunction(finalizer):
                 await finalizer()
             else:
                 finalizer()

nucliadb/standalone/run.py CHANGED Viewed

@@ -21,7 +21,6 @@ import asyncio
 import logging
 import os
 import sys
-from typing import Optional
 import argdantic
 import uvicorn  # type: ignore
@@ -116,6 +115,9 @@ def run():
     if nuclia_settings.nuclia_service_account:
         settings_to_output["NUA API key"] = "Configured ✔"
         settings_to_output["NUA API zone"] = nuclia_settings.nuclia_zone
+        settings_to_output["NUA API url"] = (
+            nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone) + "/api"
+        )
     settings_to_output_fmted = "\n".join(
         [f"||      - {k}:{' ' * (27 - len(k))}{v}" for k, v in settings_to_output.items()]
@@ -145,9 +147,8 @@ def run():
     server.run()
-def get_latest_nucliadb() -> Optional[str]:
-    loop = asyncio.get_event_loop()
-    return loop.run_until_complete(versions.latest_nucliadb())
+def get_latest_nucliadb() -> str | None:
+    return asyncio.run(versions.latest_nucliadb())
 async def run_async_nucliadb(settings: Settings) -> uvicorn.Server:

nucliadb/standalone/settings.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 from enum import Enum
-from typing import Optional
 import pydantic
@@ -44,11 +43,11 @@ class Settings(DriverSettings, StorageSettings, ExtendedStorageSettings):
     # all settings here are mapped in to other env var settings used
     # in the app. These are helper settings to make things easier to
     # use with standalone app vs cluster app.
-    nua_api_key: Optional[str] = pydantic.Field(
+    nua_api_key: str | None = pydantic.Field(
         default=None,
-        description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/rag/advanced/understanding/intro#get-a-nua-key",  # noqa
+        description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/rag/advanced/understanding/intro#get-a-nua-key",
     )
-    zone: Optional[str] = pydantic.Field(default=None, description="Nuclia Understanding API Zone ID")
+    zone: str | None = pydantic.Field(default=None, description="Nuclia Understanding API Zone ID")
     http_host: str = pydantic.Field(default="0.0.0.0", description="HTTP Port")
     http_port: int = pydantic.Field(default=8080, description="HTTP Port")
     ingest_grpc_port: int = pydantic.Field(default=8030, description="Ingest GRPC Port")
@@ -83,7 +82,7 @@ class Settings(DriverSettings, StorageSettings, ExtendedStorageSettings):
         description="Default role to assign to user that is authenticated \
                     upstream. Not used with `upstream_naive` auth policy.",
     )
-    auth_policy_role_mapping: Optional[dict[str, dict[str, list[NucliaDBRoles]]]] = pydantic.Field(
+    auth_policy_role_mapping: dict[str, dict[str, list[NucliaDBRoles]]] | None = pydantic.Field(
         default=None,
         description="""
 Role mapping for `upstream_auth_header`, `upstream_oauth2` and `upstream_basicauth` auth policies.
@@ -97,7 +96,7 @@ Examples:
 """,
     )
-    jwk_key: Optional[str] = pydantic.Field(
+    jwk_key: str | None = pydantic.Field(
         default=None,
         description="JWK key used for temporary token generation and validation.",
     )

nucliadb/standalone/versions.py CHANGED Viewed

@@ -20,7 +20,6 @@
 import enum
 import importlib.metadata
 import logging
-from typing import Optional
 from cachetools import TTLCache
@@ -45,11 +44,11 @@ def installed_nucliadb() -> str:
     return get_installed_version(StandalonePackages.NUCLIADB.value)
-async def latest_nucliadb() -> Optional[str]:
+async def latest_nucliadb() -> str | None:
     return await get_latest_version(StandalonePackages.NUCLIADB.value)
-def nucliadb_updates_available(installed: str, latest: Optional[str]) -> bool:
+def nucliadb_updates_available(installed: str, latest: str | None) -> bool:
     if latest is None:
         return False
     return is_newer_release(installed, latest)
@@ -96,7 +95,7 @@ def get_installed_version(package_name: str) -> str:
     return importlib.metadata.distribution(package_name).version
-async def get_latest_version(package: str) -> Optional[str]:
+async def get_latest_version(package: str) -> str | None:
     result = CACHE.get(package, None)
     if result is None:
         try:

nucliadb/tasks/consumer.py CHANGED Viewed

@@ -19,9 +19,10 @@
 #
 import asyncio
-from typing import Generic, Optional, Type
+from typing import Generic
 import nats
+import nats.js.api
 import pydantic
 from nats.aio.client import Msg
@@ -43,8 +44,9 @@ class NatsTaskConsumer(Generic[MsgType]):
         stream: NatsStream,
         consumer: NatsConsumer,
         callback: Callback,
-        msg_type: Type[MsgType],
-        max_concurrent_messages: Optional[int] = None,
+        msg_type: type[MsgType],
+        max_concurrent_messages: int | None = None,
+        max_deliver: int | None = None,
     ):
         self.name = name
         self.stream = stream
@@ -52,6 +54,7 @@ class NatsTaskConsumer(Generic[MsgType]):
         self.callback = callback
         self.msg_type = msg_type
         self.max_concurrent_messages = max_concurrent_messages
+        self.max_deliver = max_deliver
         self.initialized = False
         self.running_tasks: list[asyncio.Task] = []
         self.subscription = None
@@ -71,7 +74,8 @@ class NatsTaskConsumer(Generic[MsgType]):
         for task in self.running_tasks:
             task.cancel()
         try:
-            await asyncio.wait(self.running_tasks, timeout=5)
+            if len(self.running_tasks) > 0:
+                await asyncio.wait(self.running_tasks, timeout=5)
             self.running_tasks.clear()
         except asyncio.TimeoutError:
             pass
@@ -96,6 +100,7 @@ class NatsTaskConsumer(Generic[MsgType]):
                 ack_wait=nats_consumer_settings.nats_ack_wait,
                 idle_heartbeat=nats_consumer_settings.nats_idle_heartbeat,
                 max_ack_pending=max_ack_pending,
+                max_deliver=self.max_deliver,
             ),
         )
         logger.info(
@@ -168,8 +173,6 @@ class NatsTaskConsumer(Generic[MsgType]):
                     },
                 )
                 await msg.ack()
-            finally:
-                return
 def create_consumer(
@@ -177,8 +180,9 @@ def create_consumer(
     stream: NatsStream,
     consumer: NatsConsumer,
     callback: Callback,
-    msg_type: Type[MsgType],
-    max_concurrent_messages: Optional[int] = None,
+    msg_type: type[MsgType],
+    max_concurrent_messages: int | None = None,
+    max_retries: int = 100,
 ) -> NatsTaskConsumer[MsgType]:
     """
     Returns a non-initialized consumer
@@ -190,4 +194,5 @@ def create_consumer(
         callback=callback,
         msg_type=msg_type,
         max_concurrent_messages=max_concurrent_messages,
+        max_deliver=max_retries,
     )

nucliadb/tasks/models.py CHANGED Viewed

@@ -17,7 +17,8 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Any, Callable, Coroutine, TypeVar
+from collections.abc import Callable, Coroutine
+from typing import Any, TypeVar
 import pydantic

nucliadb/tasks/producer.py CHANGED Viewed

@@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Generic, Type
+from typing import Generic
 from nucliadb.tasks.logger import logger
 from nucliadb.tasks.models import MsgType
@@ -32,7 +32,7 @@ class NatsTaskProducer(Generic[MsgType]):
         name: str,
         stream: NatsStream,
         producer_subject: str,
-        msg_type: Type[MsgType],
+        msg_type: type[MsgType],
     ):
         self.name = name
         self.stream = stream
@@ -69,7 +69,7 @@ def create_producer(
     name: str,
     stream: NatsStream,
     producer_subject: str,
-    msg_type: Type[MsgType],
+    msg_type: type[MsgType],
 ) -> NatsTaskProducer[MsgType]:
     """
     Returns a non-initialized producer.

nucliadb/tasks/retries.py CHANGED Viewed

@@ -19,9 +19,10 @@
 #
 import functools
 import logging
+from collections.abc import Callable
 from datetime import datetime, timezone
 from enum import Enum
-from typing import Callable, Optional, cast
+from typing import cast
 from pydantic import BaseModel
@@ -44,7 +45,7 @@ class TaskMetadata(BaseModel):
     status: Status
     retries: int = 0
     error_messages: list[str] = []
-    last_modified: Optional[datetime] = None
+    last_modified: datetime | None = None
 class TaskRetryHandler:
@@ -87,7 +88,7 @@ class TaskRetryHandler:
             kbid=self.kbid, task_type=self.task_type, task_id=self.task_id
         )
-    async def get_metadata(self) -> Optional[TaskMetadata]:
+    async def get_metadata(self) -> TaskMetadata | None:
         return await _get_metadata(self.context.kv_driver, self.metadata_key)
     async def set_metadata(self, metadata: TaskMetadata) -> None:
@@ -150,7 +151,7 @@ class TaskRetryHandler:
         return wrapper
-async def _get_metadata(kv_driver: Driver, metadata_key: str) -> Optional[TaskMetadata]:
+async def _get_metadata(kv_driver: Driver, metadata_key: str) -> TaskMetadata | None:
     async with kv_driver.ro_transaction() as txn:
         metadata = await txn.get(metadata_key)
         if metadata is None:
@@ -173,7 +174,7 @@ async def purge_metadata(kv_driver: Driver) -> int:
         return 0
     total_purged = 0
-    start: Optional[str] = ""
+    start: str | None = ""
     while True:
         start, purged = await purge_batch(kv_driver, start)
         total_purged += purged
@@ -183,8 +184,8 @@ async def purge_metadata(kv_driver: Driver) -> int:
 async def purge_batch(
-    kv_driver: PGDriver, start: Optional[str] = None, batch_size: int = 200
-) -> tuple[Optional[str], int]:
+    kv_driver: PGDriver, start: str | None = None, batch_size: int = 200
+) -> tuple[str | None, int]:
     """
     Returns the next start key and the number of purged records. If start is None, it means there are no more records to purge.
     """

nucliadb/train/api/utils.py CHANGED Viewed

@@ -19,12 +19,10 @@
 #
-from typing import Optional
 from nucliadb.train.utils import get_shard_manager
-async def get_kb_partitions(kbid: str, prefix: Optional[str] = None) -> list[str]:
+async def get_kb_partitions(kbid: str, prefix: str | None = None) -> list[str]:
     shard_manager = get_shard_manager()
     shards = await shard_manager.get_shards_by_kbid_inner(kbid=kbid)
     valid_shards = []

nucliadb/train/api/v1/shards.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 import json
-from typing import Optional
 import google.protobuf.message
 import pydantic
@@ -63,7 +62,7 @@ async def object_get_response(
     )
-async def get_trainset(request: Request) -> tuple[TrainSet, Optional[FilterExpression]]:
+async def get_trainset(request: Request) -> tuple[TrainSet, FilterExpression | None]:
     if request.headers.get("Content-Type") == "application/json":
         try:
             trainset_model = TrainSetModel.model_validate(await request.json())

nucliadb/train/api/v1/trainset.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Optional
 from fastapi import HTTPException, Request
 from fastapi_versioning import version
@@ -57,7 +56,7 @@ async def get_partitions_prefix(request: Request, kbid: str, prefix: str) -> Tra
     return await get_partitions(kbid, prefix=prefix)
-async def get_partitions(kbid: str, prefix: Optional[str] = None) -> TrainSetPartitions:
+async def get_partitions(kbid: str, prefix: str | None = None) -> TrainSetPartitions:
     try:
         all_keys = await get_kb_partitions(kbid, prefix)
     except ShardNotFound:

nucliadb/train/app.py CHANGED Viewed

@@ -50,7 +50,6 @@ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
 fastapi_settings = dict(
     debug=running_settings.debug,
-    middleware=middleware,
     lifespan=lifespan,
     exception_handlers={
         Exception: global_exception_handler,
@@ -71,6 +70,7 @@ application = VersionedFastAPI(
     prefix_format=f"/{API_PREFIX}/v{{major}}",
     default_version=(1, 0),
     enable_latest=False,
+    middleware=middleware,
     kwargs=fastapi_settings,
 )

nucliadb/train/generator.py CHANGED Viewed

@@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncIterator, Callable, Optional
+from collections.abc import AsyncIterator, Callable
 from fastapi import HTTPException
 from grpc import StatusCode
@@ -53,11 +53,11 @@ from nucliadb.train.utils import get_shard_manager
 from nucliadb_models.filters import FilterExpression
 from nucliadb_protos.dataset_pb2 import TaskType, TrainSet
-BatchGenerator = Callable[[str, TrainSet, str, Optional[FilterExpression]], AsyncIterator[TrainBatch]]
+BatchGenerator = Callable[[str, TrainSet, str, FilterExpression | None], AsyncIterator[TrainBatch]]
 async def generate_train_data(
-    kbid: str, shard: str, trainset: TrainSet, filter_expression: Optional[FilterExpression] = None
+    kbid: str, shard: str, trainset: TrainSet, filter_expression: FilterExpression | None = None
 ):
     # Get the data structure to generate data
     shard_manager = get_shard_manager()
@@ -66,7 +66,7 @@ async def generate_train_data(
     if trainset.batch_size == 0:
         trainset.batch_size = 50
-    batch_generator: Optional[BatchGenerator] = None
+    batch_generator: BatchGenerator | None = None
     if trainset.type == TaskType.FIELD_CLASSIFICATION:
         batch_generator = field_classification_batch_generator

nucliadb/train/generators/field_classifier.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
 from nidx_protos.nodereader_pb2 import StreamRequest
@@ -39,7 +39,7 @@ def field_classification_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[FieldClassificationBatch, None]:
     generator = generate_field_classification_payloads(kbid, trainset, shard_replica_id)
     batch_generator = batchify(generator, trainset.batch_size, FieldClassificationBatch)

nucliadb/train/generators/field_streaming.py CHANGED Viewed

@@ -19,7 +19,7 @@
 #
 import asyncio
-from typing import AsyncGenerator, AsyncIterable, Optional
+from collections.abc import AsyncGenerator, AsyncIterable
 from nidx_protos.nodereader_pb2 import DocumentItem, StreamRequest
@@ -45,7 +45,7 @@ def field_streaming_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[FieldStreamingBatch, None]:
     generator = generate_field_streaming_payloads(kbid, trainset, shard_replica_id, filter_expression)
     batch_generator = batchify(generator, trainset.batch_size, FieldStreamingBatch)
@@ -53,7 +53,7 @@ def field_streaming_batch_generator(
 async def generate_field_streaming_payloads(
-    kbid: str, trainset: TrainSet, shard_replica_id: str, filter_expression: Optional[FilterExpression]
+    kbid: str, trainset: TrainSet, shard_replica_id: str, filter_expression: FilterExpression | None
 ) -> AsyncGenerator[FieldSplitData, None]:
     request = StreamRequest()
     request.shard_id.id = shard_replica_id
@@ -192,7 +192,7 @@ async def _fetch_basic(kbid: str, fsd: FieldSplitData):
         fsd.basic.CopyFrom(basic)
-async def get_field_text(kbid: str, rid: str, field: str, field_type: str) -> Optional[ExtractedText]:
+async def get_field_text(kbid: str, rid: str, field: str, field_type: str) -> ExtractedText | None:
     orm_resource = await get_resource_from_cache_or_db(kbid, rid)
     if orm_resource is None:
@@ -208,7 +208,7 @@ async def get_field_text(kbid: str, rid: str, field: str, field_type: str) -> Op
 async def get_field_metadata(
     kbid: str, rid: str, field: str, field_type: str
-) -> Optional[FieldComputedMetadata]:
+) -> FieldComputedMetadata | None:
     orm_resource = await get_resource_from_cache_or_db(kbid, rid)
     if orm_resource is None:
@@ -222,7 +222,7 @@ async def get_field_metadata(
     return field_metadata
-async def get_field_basic(kbid: str, rid: str, field: str, field_type: str) -> Optional[Basic]:
+async def get_field_basic(kbid: str, rid: str, field: str, field_type: str) -> Basic | None:
     orm_resource = await get_resource_from_cache_or_db(kbid, rid)
     if orm_resource is None:

nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

nucliadb 6.7.2.post4874py3-none-any.whl → 6.10.0.post5705py3-none-any.whl