PyPI - nucliadb - Versions diffs - 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl - Mend

nucliadb 6.9.1.post5192py3-none-any.whl → 6.10.0.post5705py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (231) hide show

migrations/0023_backfill_pg_catalog.py +2 -2
migrations/0029_backfill_field_status.py +3 -4
migrations/0032_remove_old_relations.py +2 -3
migrations/0038_backfill_catalog_field_labels.py +2 -2
migrations/0039_backfill_converation_splits_metadata.py +2 -2
migrations/0041_reindex_conversations.py +137 -0
migrations/pg/0010_shards_index.py +34 -0
nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
migrations/pg/0012_catalog_statistics_undo.py +26 -0
nucliadb/backups/create.py +2 -15
nucliadb/backups/restore.py +4 -15
nucliadb/backups/tasks.py +4 -1
nucliadb/common/back_pressure/cache.py +2 -3
nucliadb/common/back_pressure/materializer.py +7 -13
nucliadb/common/back_pressure/settings.py +6 -6
nucliadb/common/back_pressure/utils.py +1 -0
nucliadb/common/cache.py +9 -9
nucliadb/common/catalog/interface.py +12 -12
nucliadb/common/catalog/pg.py +41 -29
nucliadb/common/catalog/utils.py +3 -3
nucliadb/common/cluster/manager.py +5 -4
nucliadb/common/cluster/rebalance.py +483 -114
nucliadb/common/cluster/rollover.py +25 -9
nucliadb/common/cluster/settings.py +3 -8
nucliadb/common/cluster/utils.py +34 -8
nucliadb/common/context/__init__.py +7 -8
nucliadb/common/context/fastapi.py +1 -2
nucliadb/common/datamanagers/__init__.py +2 -4
nucliadb/common/datamanagers/atomic.py +4 -2
nucliadb/common/datamanagers/cluster.py +1 -2
nucliadb/common/datamanagers/fields.py +3 -4
nucliadb/common/datamanagers/kb.py +6 -6
nucliadb/common/datamanagers/labels.py +2 -3
nucliadb/common/datamanagers/resources.py +10 -33
nucliadb/common/datamanagers/rollover.py +5 -7
nucliadb/common/datamanagers/search_configurations.py +1 -2
nucliadb/common/datamanagers/synonyms.py +1 -2
nucliadb/common/datamanagers/utils.py +4 -4
nucliadb/common/datamanagers/vectorsets.py +4 -4
nucliadb/common/external_index_providers/base.py +32 -5
nucliadb/common/external_index_providers/manager.py +4 -5
nucliadb/common/filter_expression.py +128 -40
nucliadb/common/http_clients/processing.py +12 -23
nucliadb/common/ids.py +6 -4
nucliadb/common/locking.py +1 -2
nucliadb/common/maindb/driver.py +9 -8
nucliadb/common/maindb/local.py +5 -5
nucliadb/common/maindb/pg.py +9 -8
nucliadb/common/nidx.py +3 -4
nucliadb/export_import/datamanager.py +4 -3
nucliadb/export_import/exporter.py +11 -19
nucliadb/export_import/importer.py +13 -6
nucliadb/export_import/tasks.py +2 -0
nucliadb/export_import/utils.py +6 -18
nucliadb/health.py +2 -2
nucliadb/ingest/app.py +8 -8
nucliadb/ingest/consumer/consumer.py +8 -10
nucliadb/ingest/consumer/pull.py +3 -8
nucliadb/ingest/consumer/service.py +3 -3
nucliadb/ingest/consumer/utils.py +1 -1
nucliadb/ingest/fields/base.py +28 -49
nucliadb/ingest/fields/conversation.py +12 -12
nucliadb/ingest/fields/exceptions.py +1 -2
nucliadb/ingest/fields/file.py +22 -8
nucliadb/ingest/fields/link.py +7 -7
nucliadb/ingest/fields/text.py +2 -3
nucliadb/ingest/orm/brain_v2.py +78 -64
nucliadb/ingest/orm/broker_message.py +2 -4
nucliadb/ingest/orm/entities.py +10 -209
nucliadb/ingest/orm/index_message.py +4 -4
nucliadb/ingest/orm/knowledgebox.py +18 -27
nucliadb/ingest/orm/processor/auditing.py +1 -3
nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
nucliadb/ingest/orm/processor/processor.py +27 -27
nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
nucliadb/ingest/orm/resource.py +72 -70
nucliadb/ingest/orm/utils.py +1 -1
nucliadb/ingest/processing.py +17 -17
nucliadb/ingest/serialize.py +202 -145
nucliadb/ingest/service/writer.py +3 -109
nucliadb/ingest/settings.py +3 -4
nucliadb/ingest/utils.py +1 -2
nucliadb/learning_proxy.py +11 -11
nucliadb/metrics_exporter.py +5 -4
nucliadb/middleware/__init__.py +82 -1
nucliadb/migrator/datamanager.py +3 -4
nucliadb/migrator/migrator.py +1 -2
nucliadb/migrator/models.py +1 -2
nucliadb/migrator/settings.py +1 -2
nucliadb/models/internal/augment.py +614 -0
nucliadb/models/internal/processing.py +19 -19
nucliadb/openapi.py +2 -2
nucliadb/purge/__init__.py +3 -8
nucliadb/purge/orphan_shards.py +1 -2
nucliadb/reader/__init__.py +5 -0
nucliadb/reader/api/models.py +6 -13
nucliadb/reader/api/v1/download.py +59 -38
nucliadb/reader/api/v1/export_import.py +4 -4
nucliadb/reader/api/v1/learning_config.py +24 -4
nucliadb/reader/api/v1/resource.py +61 -9
nucliadb/reader/api/v1/services.py +18 -14
nucliadb/reader/app.py +3 -1
nucliadb/reader/reader/notifications.py +1 -2
nucliadb/search/api/v1/__init__.py +2 -0
nucliadb/search/api/v1/ask.py +3 -4
nucliadb/search/api/v1/augment.py +585 -0
nucliadb/search/api/v1/catalog.py +11 -15
nucliadb/search/api/v1/find.py +16 -22
nucliadb/search/api/v1/hydrate.py +25 -25
nucliadb/search/api/v1/knowledgebox.py +1 -2
nucliadb/search/api/v1/predict_proxy.py +1 -2
nucliadb/search/api/v1/resource/ask.py +7 -7
nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
nucliadb/search/api/v1/resource/search.py +9 -11
nucliadb/search/api/v1/retrieve.py +130 -0
nucliadb/search/api/v1/search.py +28 -32
nucliadb/search/api/v1/suggest.py +11 -14
nucliadb/search/api/v1/summarize.py +1 -2
nucliadb/search/api/v1/utils.py +2 -2
nucliadb/search/app.py +3 -2
nucliadb/search/augmentor/__init__.py +21 -0
nucliadb/search/augmentor/augmentor.py +232 -0
nucliadb/search/augmentor/fields.py +704 -0
nucliadb/search/augmentor/metrics.py +24 -0
nucliadb/search/augmentor/paragraphs.py +334 -0
nucliadb/search/augmentor/resources.py +238 -0
nucliadb/search/augmentor/utils.py +33 -0
nucliadb/search/lifecycle.py +3 -1
nucliadb/search/predict.py +24 -17
nucliadb/search/predict_models.py +8 -9
nucliadb/search/requesters/utils.py +11 -10
nucliadb/search/search/cache.py +19 -23
nucliadb/search/search/chat/ask.py +88 -59
nucliadb/search/search/chat/exceptions.py +3 -5
nucliadb/search/search/chat/fetcher.py +201 -0
nucliadb/search/search/chat/images.py +6 -4
nucliadb/search/search/chat/old_prompt.py +1375 -0
nucliadb/search/search/chat/parser.py +510 -0
nucliadb/search/search/chat/prompt.py +563 -615
nucliadb/search/search/chat/query.py +449 -36
nucliadb/search/search/chat/rpc.py +85 -0
nucliadb/search/search/fetch.py +3 -4
nucliadb/search/search/filters.py +8 -11
nucliadb/search/search/find.py +33 -31
nucliadb/search/search/find_merge.py +124 -331
nucliadb/search/search/graph_strategy.py +14 -12
nucliadb/search/search/hydrator/__init__.py +3 -152
nucliadb/search/search/hydrator/fields.py +92 -50
nucliadb/search/search/hydrator/images.py +7 -7
nucliadb/search/search/hydrator/paragraphs.py +42 -26
nucliadb/search/search/hydrator/resources.py +20 -16
nucliadb/search/search/ingestion_agents.py +5 -5
nucliadb/search/search/merge.py +90 -94
nucliadb/search/search/metrics.py +10 -9
nucliadb/search/search/paragraphs.py +7 -9
nucliadb/search/search/predict_proxy.py +13 -9
nucliadb/search/search/query.py +14 -86
nucliadb/search/search/query_parser/fetcher.py +51 -82
nucliadb/search/search/query_parser/models.py +19 -20
nucliadb/search/search/query_parser/old_filters.py +20 -19
nucliadb/search/search/query_parser/parsers/ask.py +4 -5
nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
nucliadb/search/search/query_parser/parsers/common.py +5 -6
nucliadb/search/search/query_parser/parsers/find.py +6 -26
nucliadb/search/search/query_parser/parsers/graph.py +13 -23
nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
nucliadb/search/search/query_parser/parsers/search.py +15 -53
nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
nucliadb/search/search/rank_fusion.py +18 -13
nucliadb/search/search/rerankers.py +5 -6
nucliadb/search/search/retrieval.py +300 -0
nucliadb/search/search/summarize.py +5 -6
nucliadb/search/search/utils.py +3 -4
nucliadb/search/settings.py +1 -2
nucliadb/standalone/api_router.py +1 -1
nucliadb/standalone/app.py +4 -3
nucliadb/standalone/auth.py +5 -6
nucliadb/standalone/lifecycle.py +2 -2
nucliadb/standalone/run.py +2 -4
nucliadb/standalone/settings.py +5 -6
nucliadb/standalone/versions.py +3 -4
nucliadb/tasks/consumer.py +13 -8
nucliadb/tasks/models.py +2 -1
nucliadb/tasks/producer.py +3 -3
nucliadb/tasks/retries.py +8 -7
nucliadb/train/api/utils.py +1 -3
nucliadb/train/api/v1/shards.py +1 -2
nucliadb/train/api/v1/trainset.py +1 -2
nucliadb/train/app.py +1 -1
nucliadb/train/generator.py +4 -4
nucliadb/train/generators/field_classifier.py +2 -2
nucliadb/train/generators/field_streaming.py +6 -6
nucliadb/train/generators/image_classifier.py +2 -2
nucliadb/train/generators/paragraph_classifier.py +2 -2
nucliadb/train/generators/paragraph_streaming.py +2 -2
nucliadb/train/generators/question_answer_streaming.py +2 -2
nucliadb/train/generators/sentence_classifier.py +2 -2
nucliadb/train/generators/token_classifier.py +3 -2
nucliadb/train/generators/utils.py +6 -5
nucliadb/train/nodes.py +3 -3
nucliadb/train/resource.py +6 -8
nucliadb/train/settings.py +3 -4
nucliadb/train/types.py +11 -11
nucliadb/train/upload.py +3 -2
nucliadb/train/uploader.py +1 -2
nucliadb/train/utils.py +1 -2
nucliadb/writer/api/v1/export_import.py +4 -1
nucliadb/writer/api/v1/field.py +7 -11
nucliadb/writer/api/v1/knowledgebox.py +3 -4
nucliadb/writer/api/v1/resource.py +9 -20
nucliadb/writer/api/v1/services.py +10 -132
nucliadb/writer/api/v1/upload.py +73 -72
nucliadb/writer/app.py +8 -2
nucliadb/writer/resource/basic.py +12 -15
nucliadb/writer/resource/field.py +7 -5
nucliadb/writer/resource/origin.py +7 -0
nucliadb/writer/settings.py +2 -3
nucliadb/writer/tus/__init__.py +2 -3
nucliadb/writer/tus/azure.py +1 -3
nucliadb/writer/tus/dm.py +3 -3
nucliadb/writer/tus/exceptions.py +3 -4
nucliadb/writer/tus/gcs.py +5 -6
nucliadb/writer/tus/s3.py +2 -3
nucliadb/writer/tus/storage.py +3 -3
{nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
nucliadb/common/datamanagers/entities.py +0 -139
nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
{nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
{nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
{nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0

nucliadb/standalone/auth.py CHANGED Viewed

@@ -19,7 +19,6 @@
 import base64
 import logging
 import time
-from typing import Optional
 import orjson
 from jwcrypto import jwe, jwk  # type: ignore
@@ -51,7 +50,7 @@ def get_mapped_roles(*, settings: Settings, data: dict[str, str]) -> list[str]:
 async def authenticate_auth_token(
     settings: Settings, request: HTTPConnection
-) -> Optional[tuple[AuthCredentials, BaseUser]]:
+) -> tuple[AuthCredentials, BaseUser] | None:
     if "eph-token" not in request.query_params or settings.jwk_key is None:
         return None
@@ -81,7 +80,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
     def __init__(self, settings: Settings) -> None:
         self.settings = settings
-    async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
+    async def authenticate(self, request: HTTPConnection) -> tuple[AuthCredentials, BaseUser] | None:
         token_resp = await authenticate_auth_token(self.settings, request)
         if token_resp is not None:
             return token_resp
@@ -109,7 +108,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
     def __init__(self, settings: Settings) -> None:
         self.settings = settings
-    async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
+    async def authenticate(self, request: HTTPConnection) -> tuple[AuthCredentials, BaseUser] | None:
         token_resp = await authenticate_auth_token(self.settings, request)
         if token_resp is not None:
             return token_resp
@@ -160,7 +159,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
     def __init__(self, settings: Settings) -> None:
         self.settings = settings
-    async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
+    async def authenticate(self, request: HTTPConnection) -> tuple[AuthCredentials, BaseUser] | None:
         token_resp = await authenticate_auth_token(self.settings, request)
         if token_resp is not None:
             return token_resp
@@ -189,7 +188,7 @@ class UpstreamNaiveAuthenticationBackend(NucliaCloudAuthenticationBackend):
             user_header=settings.auth_policy_user_header,
         )
-    async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
+    async def authenticate(self, request: HTTPConnection) -> tuple[AuthCredentials, BaseUser] | None:
         token_resp = await authenticate_auth_token(self.settings, request)
         if token_resp is not None:
             return token_resp

nucliadb/standalone/lifecycle.py CHANGED Viewed

@@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-import asyncio
+import inspect
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
@@ -56,7 +56,7 @@ async def lifespan(app: FastAPI):
         yield
         for finalizer in SYNC_FINALIZERS:
-            if asyncio.iscoroutinefunction(finalizer):
+            if inspect.iscoroutinefunction(finalizer):
                 await finalizer()
             else:
                 finalizer()

nucliadb/standalone/run.py CHANGED Viewed

@@ -21,7 +21,6 @@ import asyncio
 import logging
 import os
 import sys
-from typing import Optional
 import argdantic
 import uvicorn  # type: ignore
@@ -148,9 +147,8 @@ def run():
     server.run()
-def get_latest_nucliadb() -> Optional[str]:
-    loop = asyncio.get_event_loop()
-    return loop.run_until_complete(versions.latest_nucliadb())
+def get_latest_nucliadb() -> str | None:
+    return asyncio.run(versions.latest_nucliadb())
 async def run_async_nucliadb(settings: Settings) -> uvicorn.Server:

nucliadb/standalone/settings.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 from enum import Enum
-from typing import Optional
 import pydantic
@@ -44,11 +43,11 @@ class Settings(DriverSettings, StorageSettings, ExtendedStorageSettings):
     # all settings here are mapped in to other env var settings used
     # in the app. These are helper settings to make things easier to
     # use with standalone app vs cluster app.
-    nua_api_key: Optional[str] = pydantic.Field(
+    nua_api_key: str | None = pydantic.Field(
         default=None,
-        description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/rag/advanced/understanding/intro#get-a-nua-key",  # noqa
+        description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/rag/advanced/understanding/intro#get-a-nua-key",
     )
-    zone: Optional[str] = pydantic.Field(default=None, description="Nuclia Understanding API Zone ID")
+    zone: str | None = pydantic.Field(default=None, description="Nuclia Understanding API Zone ID")
     http_host: str = pydantic.Field(default="0.0.0.0", description="HTTP Port")
     http_port: int = pydantic.Field(default=8080, description="HTTP Port")
     ingest_grpc_port: int = pydantic.Field(default=8030, description="Ingest GRPC Port")
@@ -83,7 +82,7 @@ class Settings(DriverSettings, StorageSettings, ExtendedStorageSettings):
         description="Default role to assign to user that is authenticated \
                     upstream. Not used with `upstream_naive` auth policy.",
     )
-    auth_policy_role_mapping: Optional[dict[str, dict[str, list[NucliaDBRoles]]]] = pydantic.Field(
+    auth_policy_role_mapping: dict[str, dict[str, list[NucliaDBRoles]]] | None = pydantic.Field(
         default=None,
         description="""
 Role mapping for `upstream_auth_header`, `upstream_oauth2` and `upstream_basicauth` auth policies.
@@ -97,7 +96,7 @@ Examples:
 """,
     )
-    jwk_key: Optional[str] = pydantic.Field(
+    jwk_key: str | None = pydantic.Field(
         default=None,
         description="JWK key used for temporary token generation and validation.",
     )

nucliadb/standalone/versions.py CHANGED Viewed

@@ -20,7 +20,6 @@
 import enum
 import importlib.metadata
 import logging
-from typing import Optional
 from cachetools import TTLCache
@@ -45,11 +44,11 @@ def installed_nucliadb() -> str:
     return get_installed_version(StandalonePackages.NUCLIADB.value)
-async def latest_nucliadb() -> Optional[str]:
+async def latest_nucliadb() -> str | None:
     return await get_latest_version(StandalonePackages.NUCLIADB.value)
-def nucliadb_updates_available(installed: str, latest: Optional[str]) -> bool:
+def nucliadb_updates_available(installed: str, latest: str | None) -> bool:
     if latest is None:
         return False
     return is_newer_release(installed, latest)
@@ -96,7 +95,7 @@ def get_installed_version(package_name: str) -> str:
     return importlib.metadata.distribution(package_name).version
-async def get_latest_version(package: str) -> Optional[str]:
+async def get_latest_version(package: str) -> str | None:
     result = CACHE.get(package, None)
     if result is None:
         try:

nucliadb/tasks/consumer.py CHANGED Viewed

@@ -19,9 +19,10 @@
 #
 import asyncio
-from typing import Generic, Optional, Type
+from typing import Generic
 import nats
+import nats.js.api
 import pydantic
 from nats.aio.client import Msg
@@ -43,8 +44,9 @@ class NatsTaskConsumer(Generic[MsgType]):
         stream: NatsStream,
         consumer: NatsConsumer,
         callback: Callback,
-        msg_type: Type[MsgType],
-        max_concurrent_messages: Optional[int] = None,
+        msg_type: type[MsgType],
+        max_concurrent_messages: int | None = None,
+        max_deliver: int | None = None,
     ):
         self.name = name
         self.stream = stream
@@ -52,6 +54,7 @@ class NatsTaskConsumer(Generic[MsgType]):
         self.callback = callback
         self.msg_type = msg_type
         self.max_concurrent_messages = max_concurrent_messages
+        self.max_deliver = max_deliver
         self.initialized = False
         self.running_tasks: list[asyncio.Task] = []
         self.subscription = None
@@ -71,7 +74,8 @@ class NatsTaskConsumer(Generic[MsgType]):
         for task in self.running_tasks:
             task.cancel()
         try:
-            await asyncio.wait(self.running_tasks, timeout=5)
+            if len(self.running_tasks) > 0:
+                await asyncio.wait(self.running_tasks, timeout=5)
             self.running_tasks.clear()
         except asyncio.TimeoutError:
             pass
@@ -96,6 +100,7 @@ class NatsTaskConsumer(Generic[MsgType]):
                 ack_wait=nats_consumer_settings.nats_ack_wait,
                 idle_heartbeat=nats_consumer_settings.nats_idle_heartbeat,
                 max_ack_pending=max_ack_pending,
+                max_deliver=self.max_deliver,
             ),
         )
         logger.info(
@@ -168,8 +173,6 @@ class NatsTaskConsumer(Generic[MsgType]):
                     },
                 )
                 await msg.ack()
-            finally:
-                return
 def create_consumer(
@@ -177,8 +180,9 @@ def create_consumer(
     stream: NatsStream,
     consumer: NatsConsumer,
     callback: Callback,
-    msg_type: Type[MsgType],
-    max_concurrent_messages: Optional[int] = None,
+    msg_type: type[MsgType],
+    max_concurrent_messages: int | None = None,
+    max_retries: int = 100,
 ) -> NatsTaskConsumer[MsgType]:
     """
     Returns a non-initialized consumer
@@ -190,4 +194,5 @@ def create_consumer(
         callback=callback,
         msg_type=msg_type,
         max_concurrent_messages=max_concurrent_messages,
+        max_deliver=max_retries,
     )

nucliadb/tasks/models.py CHANGED Viewed

@@ -17,7 +17,8 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Any, Callable, Coroutine, TypeVar
+from collections.abc import Callable, Coroutine
+from typing import Any, TypeVar
 import pydantic

nucliadb/tasks/producer.py CHANGED Viewed

@@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Generic, Type
+from typing import Generic
 from nucliadb.tasks.logger import logger
 from nucliadb.tasks.models import MsgType
@@ -32,7 +32,7 @@ class NatsTaskProducer(Generic[MsgType]):
         name: str,
         stream: NatsStream,
         producer_subject: str,
-        msg_type: Type[MsgType],
+        msg_type: type[MsgType],
     ):
         self.name = name
         self.stream = stream
@@ -69,7 +69,7 @@ def create_producer(
     name: str,
     stream: NatsStream,
     producer_subject: str,
-    msg_type: Type[MsgType],
+    msg_type: type[MsgType],
 ) -> NatsTaskProducer[MsgType]:
     """
     Returns a non-initialized producer.

nucliadb/tasks/retries.py CHANGED Viewed

@@ -19,9 +19,10 @@
 #
 import functools
 import logging
+from collections.abc import Callable
 from datetime import datetime, timezone
 from enum import Enum
-from typing import Callable, Optional, cast
+from typing import cast
 from pydantic import BaseModel
@@ -44,7 +45,7 @@ class TaskMetadata(BaseModel):
     status: Status
     retries: int = 0
     error_messages: list[str] = []
-    last_modified: Optional[datetime] = None
+    last_modified: datetime | None = None
 class TaskRetryHandler:
@@ -87,7 +88,7 @@ class TaskRetryHandler:
             kbid=self.kbid, task_type=self.task_type, task_id=self.task_id
         )
-    async def get_metadata(self) -> Optional[TaskMetadata]:
+    async def get_metadata(self) -> TaskMetadata | None:
         return await _get_metadata(self.context.kv_driver, self.metadata_key)
     async def set_metadata(self, metadata: TaskMetadata) -> None:
@@ -150,7 +151,7 @@ class TaskRetryHandler:
         return wrapper
-async def _get_metadata(kv_driver: Driver, metadata_key: str) -> Optional[TaskMetadata]:
+async def _get_metadata(kv_driver: Driver, metadata_key: str) -> TaskMetadata | None:
     async with kv_driver.ro_transaction() as txn:
         metadata = await txn.get(metadata_key)
         if metadata is None:
@@ -173,7 +174,7 @@ async def purge_metadata(kv_driver: Driver) -> int:
         return 0
     total_purged = 0
-    start: Optional[str] = ""
+    start: str | None = ""
     while True:
         start, purged = await purge_batch(kv_driver, start)
         total_purged += purged
@@ -183,8 +184,8 @@ async def purge_metadata(kv_driver: Driver) -> int:
 async def purge_batch(
-    kv_driver: PGDriver, start: Optional[str] = None, batch_size: int = 200
-) -> tuple[Optional[str], int]:
+    kv_driver: PGDriver, start: str | None = None, batch_size: int = 200
+) -> tuple[str | None, int]:
     """
     Returns the next start key and the number of purged records. If start is None, it means there are no more records to purge.
     """

nucliadb/train/api/utils.py CHANGED Viewed

@@ -19,12 +19,10 @@
 #
-from typing import Optional
 from nucliadb.train.utils import get_shard_manager
-async def get_kb_partitions(kbid: str, prefix: Optional[str] = None) -> list[str]:
+async def get_kb_partitions(kbid: str, prefix: str | None = None) -> list[str]:
     shard_manager = get_shard_manager()
     shards = await shard_manager.get_shards_by_kbid_inner(kbid=kbid)
     valid_shards = []

nucliadb/train/api/v1/shards.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 import json
-from typing import Optional
 import google.protobuf.message
 import pydantic
@@ -63,7 +62,7 @@ async def object_get_response(
     )
-async def get_trainset(request: Request) -> tuple[TrainSet, Optional[FilterExpression]]:
+async def get_trainset(request: Request) -> tuple[TrainSet, FilterExpression | None]:
     if request.headers.get("Content-Type") == "application/json":
         try:
             trainset_model = TrainSetModel.model_validate(await request.json())

nucliadb/train/api/v1/trainset.py CHANGED Viewed

@@ -18,7 +18,6 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Optional
 from fastapi import HTTPException, Request
 from fastapi_versioning import version
@@ -57,7 +56,7 @@ async def get_partitions_prefix(request: Request, kbid: str, prefix: str) -> Tra
     return await get_partitions(kbid, prefix=prefix)
-async def get_partitions(kbid: str, prefix: Optional[str] = None) -> TrainSetPartitions:
+async def get_partitions(kbid: str, prefix: str | None = None) -> TrainSetPartitions:
     try:
         all_keys = await get_kb_partitions(kbid, prefix)
     except ShardNotFound:

nucliadb/train/app.py CHANGED Viewed

@@ -50,7 +50,6 @@ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
 fastapi_settings = dict(
     debug=running_settings.debug,
-    middleware=middleware,
     lifespan=lifespan,
     exception_handlers={
         Exception: global_exception_handler,
@@ -71,6 +70,7 @@ application = VersionedFastAPI(
     prefix_format=f"/{API_PREFIX}/v{{major}}",
     default_version=(1, 0),
     enable_latest=False,
+    middleware=middleware,
     kwargs=fastapi_settings,
 )

nucliadb/train/generator.py CHANGED Viewed

@@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncIterator, Callable, Optional
+from collections.abc import AsyncIterator, Callable
 from fastapi import HTTPException
 from grpc import StatusCode
@@ -53,11 +53,11 @@ from nucliadb.train.utils import get_shard_manager
 from nucliadb_models.filters import FilterExpression
 from nucliadb_protos.dataset_pb2 import TaskType, TrainSet
-BatchGenerator = Callable[[str, TrainSet, str, Optional[FilterExpression]], AsyncIterator[TrainBatch]]
+BatchGenerator = Callable[[str, TrainSet, str, FilterExpression | None], AsyncIterator[TrainBatch]]
 async def generate_train_data(
-    kbid: str, shard: str, trainset: TrainSet, filter_expression: Optional[FilterExpression] = None
+    kbid: str, shard: str, trainset: TrainSet, filter_expression: FilterExpression | None = None
 ):
     # Get the data structure to generate data
     shard_manager = get_shard_manager()
@@ -66,7 +66,7 @@ async def generate_train_data(
     if trainset.batch_size == 0:
         trainset.batch_size = 50
-    batch_generator: Optional[BatchGenerator] = None
+    batch_generator: BatchGenerator | None = None
     if trainset.type == TaskType.FIELD_CLASSIFICATION:
         batch_generator = field_classification_batch_generator

nucliadb/train/generators/field_classifier.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
 from nidx_protos.nodereader_pb2 import StreamRequest
@@ -39,7 +39,7 @@ def field_classification_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[FieldClassificationBatch, None]:
     generator = generate_field_classification_payloads(kbid, trainset, shard_replica_id)
     batch_generator = batchify(generator, trainset.batch_size, FieldClassificationBatch)

nucliadb/train/generators/field_streaming.py CHANGED Viewed

@@ -19,7 +19,7 @@
 #
 import asyncio
-from typing import AsyncGenerator, AsyncIterable, Optional
+from collections.abc import AsyncGenerator, AsyncIterable
 from nidx_protos.nodereader_pb2 import DocumentItem, StreamRequest
@@ -45,7 +45,7 @@ def field_streaming_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[FieldStreamingBatch, None]:
     generator = generate_field_streaming_payloads(kbid, trainset, shard_replica_id, filter_expression)
     batch_generator = batchify(generator, trainset.batch_size, FieldStreamingBatch)
@@ -53,7 +53,7 @@ def field_streaming_batch_generator(
 async def generate_field_streaming_payloads(
-    kbid: str, trainset: TrainSet, shard_replica_id: str, filter_expression: Optional[FilterExpression]
+    kbid: str, trainset: TrainSet, shard_replica_id: str, filter_expression: FilterExpression | None
 ) -> AsyncGenerator[FieldSplitData, None]:
     request = StreamRequest()
     request.shard_id.id = shard_replica_id
@@ -192,7 +192,7 @@ async def _fetch_basic(kbid: str, fsd: FieldSplitData):
         fsd.basic.CopyFrom(basic)
-async def get_field_text(kbid: str, rid: str, field: str, field_type: str) -> Optional[ExtractedText]:
+async def get_field_text(kbid: str, rid: str, field: str, field_type: str) -> ExtractedText | None:
     orm_resource = await get_resource_from_cache_or_db(kbid, rid)
     if orm_resource is None:
@@ -208,7 +208,7 @@ async def get_field_text(kbid: str, rid: str, field: str, field_type: str) -> Op
 async def get_field_metadata(
     kbid: str, rid: str, field: str, field_type: str
-) -> Optional[FieldComputedMetadata]:
+) -> FieldComputedMetadata | None:
     orm_resource = await get_resource_from_cache_or_db(kbid, rid)
     if orm_resource is None:
@@ -222,7 +222,7 @@ async def get_field_metadata(
     return field_metadata
-async def get_field_basic(kbid: str, rid: str, field: str, field_type: str) -> Optional[Basic]:
+async def get_field_basic(kbid: str, rid: str, field: str, field_type: str) -> Basic | None:
     orm_resource = await get_resource_from_cache_or_db(kbid, rid)
     if orm_resource is None:

nucliadb/train/generators/image_classifier.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
 from nucliadb.train.generators.utils import batchify
 from nucliadb_models.filters import FilterExpression
@@ -33,7 +33,7 @@ def image_classification_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[ImageClassificationBatch, None]:
     generator = generate_image_classification_payloads(kbid, trainset, shard_replica_id)
     batch_generator = batchify(generator, trainset.batch_size, ImageClassificationBatch)

nucliadb/train/generators/paragraph_classifier.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
 from fastapi import HTTPException
 from nidx_protos.nodereader_pb2 import StreamRequest
@@ -38,7 +38,7 @@ def paragraph_classification_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[ParagraphClassificationBatch, None]:
     if len(trainset.filter.labels) != 1:
         raise HTTPException(

nucliadb/train/generators/paragraph_streaming.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
 from nidx_protos.nodereader_pb2 import StreamRequest
@@ -38,7 +38,7 @@ def paragraph_streaming_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[ParagraphStreamingBatch, None]:
     generator = generate_paragraph_streaming_payloads(kbid, trainset, shard_replica_id)
     batch_generator = batchify(generator, trainset.batch_size, ParagraphStreamingBatch)

nucliadb/train/generators/question_answer_streaming.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
 from nidx_protos.nodereader_pb2 import StreamRequest
@@ -47,7 +47,7 @@ def question_answer_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[QuestionAnswerStreamingBatch, None]:
     generator = generate_question_answer_streaming_payloads(kbid, trainset, shard_replica_id)
     batch_generator = batchify(generator, trainset.batch_size, QuestionAnswerStreamingBatch)

nucliadb/train/generators/sentence_classifier.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
 from fastapi import HTTPException
 from nidx_protos.nodereader_pb2 import StreamRequest
@@ -40,7 +40,7 @@ def sentence_classification_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[SentenceClassificationBatch, None]:
     if len(trainset.filter.labels) == 0:
         raise HTTPException(

nucliadb/train/generators/token_classifier.py CHANGED Viewed

@@ -19,7 +19,8 @@
 #
 from collections import OrderedDict
-from typing import AsyncGenerator, Optional, cast
+from collections.abc import AsyncGenerator
+from typing import cast
 from nidx_protos.nodereader_pb2 import StreamFilter, StreamRequest
@@ -43,7 +44,7 @@ def token_classification_batch_generator(
     kbid: str,
     trainset: TrainSet,
     shard_replica_id: str,
-    filter_expression: Optional[FilterExpression],
+    filter_expression: FilterExpression | None,
 ) -> AsyncGenerator[TokenClassificationBatch, None]:
     generator = generate_token_classification_payloads(kbid, trainset, shard_replica_id)
     batch_generator = batchify(generator, trainset.batch_size, TokenClassificationBatch)

nucliadb/train/generators/utils.py CHANGED Viewed

@@ -18,7 +18,8 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-from typing import Any, AsyncGenerator, AsyncIterator, Optional, Type
+from collections.abc import AsyncGenerator, AsyncIterator
+from typing import Any
 from nucliadb.common.cache import get_resource_cache
 from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
@@ -30,16 +31,16 @@ from nucliadb.train.types import T
 from nucliadb_utils.utilities import get_storage
-async def get_resource_from_cache_or_db(kbid: str, uuid: str) -> Optional[ResourceORM]:
+async def get_resource_from_cache_or_db(kbid: str, uuid: str) -> ResourceORM | None:
     resource_cache = get_resource_cache()
     if resource_cache is None:
-        return await _get_resource_from_db(kbid, uuid)
         logger.warning("Resource cache is not set")
+        return await _get_resource_from_db(kbid, uuid)
     return await resource_cache.get(kbid, uuid)
-async def _get_resource_from_db(kbid: str, uuid: str) -> Optional[ResourceORM]:
+async def _get_resource_from_db(kbid: str, uuid: str) -> ResourceORM | None:
     storage = await get_storage(service_name=SERVICE_NAME)
     async with get_driver().ro_transaction() as transaction:
         kb = KnowledgeBoxORM(transaction, storage, kbid)
@@ -81,7 +82,7 @@ async def get_paragraph(kbid: str, paragraph_id: str) -> str:
 async def batchify(
-    producer: AsyncIterator[Any], size: int, batch_klass: Type[T]
+    producer: AsyncIterator[Any], size: int, batch_klass: type[T]
 ) -> AsyncGenerator[T, None]:
     # NOTE: we are supposing all protobuffers have a data field
     batch = []

nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

nucliadb 6.9.1.post5192py3-none-any.whl → 6.10.0.post5705py3-none-any.whl