nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +8 -4
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +8 -4
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +8 -23
- nucliadb/common/cluster/rebalance.py +484 -112
- nucliadb/common/cluster/rollover.py +36 -9
- nucliadb/common/cluster/settings.py +4 -9
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +9 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +5 -34
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +129 -41
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +16 -23
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +82 -58
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +22 -5
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +10 -8
- nucliadb/ingest/consumer/service.py +5 -30
- nucliadb/ingest/consumer/shard_creator.py +16 -5
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +37 -49
- nucliadb/ingest/fields/conversation.py +55 -9
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +89 -57
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +128 -113
- nucliadb/ingest/orm/knowledgebox.py +91 -59
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +98 -153
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +82 -71
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +15 -114
- nucliadb/ingest/settings.py +36 -15
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +23 -26
- nucliadb/metrics_exporter.py +20 -6
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +4 -11
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +37 -9
- nucliadb/reader/api/v1/learning_config.py +33 -14
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +3 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +15 -19
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +28 -8
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +33 -19
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -42
- nucliadb/search/search/chat/ask.py +131 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +453 -32
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +49 -0
- nucliadb/search/search/hydrator/fields.py +217 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +323 -0
- nucliadb/search/search/hydrator/resources.py +60 -0
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +24 -7
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +44 -18
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -48
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +5 -6
- nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
- nucliadb/search/search/query_parser/parsers/common.py +21 -13
- nucliadb/search/search/query_parser/parsers/find.py +6 -29
- nucliadb/search/search/query_parser/parsers/graph.py +18 -28
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -56
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +6 -7
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +5 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +4 -10
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +15 -14
- nucliadb/writer/api/v1/knowledgebox.py +18 -56
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +43 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +5 -7
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +15 -22
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- nucliadb/search/search/hydrator.py +0 -197
- nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
nucliadb/openapi.py
CHANGED
|
@@ -33,11 +33,11 @@ def is_versioned_route(route):
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def extract_openapi(application, version, commit_id, app_name):
|
|
36
|
-
app =
|
|
36
|
+
app = next(
|
|
37
37
|
route.app
|
|
38
38
|
for route in application.routes
|
|
39
39
|
if is_versioned_route(route) and route.app.version == version
|
|
40
|
-
|
|
40
|
+
)
|
|
41
41
|
document = get_openapi(
|
|
42
42
|
title=app.title,
|
|
43
43
|
version=app.version,
|
nucliadb/purge/__init__.py
CHANGED
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
#
|
|
20
20
|
import asyncio
|
|
21
21
|
import importlib.metadata
|
|
22
|
-
from
|
|
22
|
+
from collections.abc import AsyncGenerator
|
|
23
|
+
from itertools import batched # type: ignore
|
|
23
24
|
|
|
24
25
|
from nucliadb.common import datamanagers
|
|
25
26
|
from nucliadb.common.cluster.exceptions import NodeError, ShardNotFound
|
|
@@ -233,7 +234,7 @@ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
|
|
|
233
234
|
fields.extend((await resource.get_fields(force=True)).values())
|
|
234
235
|
|
|
235
236
|
logger.info(f"Purging {len(fields)} fields for vectorset {vectorset}", extra={"kbid": kbid})
|
|
236
|
-
for fields_batch in
|
|
237
|
+
for fields_batch in batched(fields, n=20):
|
|
237
238
|
tasks = []
|
|
238
239
|
for field in fields_batch:
|
|
239
240
|
if purge_payload.storage_key_kind == VectorSetConfig.StorageKeyKind.UNSET:
|
|
@@ -317,9 +318,3 @@ def run() -> int: # pragma: no cover
|
|
|
317
318
|
setup_logging()
|
|
318
319
|
errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
|
|
319
320
|
return asyncio.run(main())
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
def batchify(iterable, n=1):
|
|
323
|
-
"""Yield successive n-sized chunks from iterable."""
|
|
324
|
-
for i in range(0, len(iterable), n):
|
|
325
|
-
yield iterable[i : i + n]
|
nucliadb/purge/orphan_shards.py
CHANGED
|
@@ -20,7 +20,6 @@
|
|
|
20
20
|
import argparse
|
|
21
21
|
import asyncio
|
|
22
22
|
import importlib.metadata
|
|
23
|
-
from typing import Optional
|
|
24
23
|
|
|
25
24
|
from grpc.aio import AioRpcError
|
|
26
25
|
from nidx_protos import nodereader_pb2, noderesources_pb2
|
|
@@ -113,7 +112,7 @@ async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
|
|
|
113
112
|
return stored_shards
|
|
114
113
|
|
|
115
114
|
|
|
116
|
-
async def _get_kbid(shard_id: str) ->
|
|
115
|
+
async def _get_kbid(shard_id: str) -> str | None:
|
|
117
116
|
kbid = None
|
|
118
117
|
try:
|
|
119
118
|
req = nodereader_pb2.GetShardRequest()
|
nucliadb/reader/__init__.py
CHANGED
|
@@ -19,6 +19,8 @@
|
|
|
19
19
|
#
|
|
20
20
|
import logging
|
|
21
21
|
|
|
22
|
+
from fastapi import Header
|
|
23
|
+
|
|
22
24
|
SERVICE_NAME = "nucliadb.reader"
|
|
23
25
|
logger = logging.getLogger(SERVICE_NAME)
|
|
24
26
|
|
|
@@ -35,3 +37,6 @@ class EndpointFilter(logging.Filter):
|
|
|
35
37
|
|
|
36
38
|
# Add filter to the logger
|
|
37
39
|
logging.getLogger("uvicorn.access").addFilter(EndpointFilter())
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
RANGE_HEADER = Header(description="Standard HTTP Range header that enable multipart requests")
|
nucliadb/reader/api/models.py
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import TYPE_CHECKING, Any
|
|
20
|
+
from typing import TYPE_CHECKING, Any
|
|
21
21
|
|
|
22
22
|
from pydantic import BaseModel
|
|
23
23
|
|
|
@@ -33,14 +33,7 @@ from nucliadb_models.resource import (
|
|
|
33
33
|
)
|
|
34
34
|
|
|
35
35
|
if TYPE_CHECKING: # pragma: no cover
|
|
36
|
-
ValueType =
|
|
37
|
-
Union[
|
|
38
|
-
models.FieldText,
|
|
39
|
-
models.FieldFile,
|
|
40
|
-
models.FieldLink,
|
|
41
|
-
models.Conversation,
|
|
42
|
-
]
|
|
43
|
-
]
|
|
36
|
+
ValueType = models.FieldText | models.FieldFile | models.FieldLink | models.Conversation | None
|
|
44
37
|
else:
|
|
45
38
|
# without Any, pydantic fails to anything as validate() fails using the Union
|
|
46
39
|
ValueType = Any
|
|
@@ -50,10 +43,10 @@ class ResourceField(BaseModel):
|
|
|
50
43
|
field_type: FieldTypeName
|
|
51
44
|
field_id: str
|
|
52
45
|
value: ValueType = None
|
|
53
|
-
extracted:
|
|
54
|
-
error:
|
|
55
|
-
status:
|
|
56
|
-
errors:
|
|
46
|
+
extracted: ExtractedDataType | None = None
|
|
47
|
+
error: Error | None = None
|
|
48
|
+
status: str | None = None
|
|
49
|
+
errors: list[Error] | None = None
|
|
57
50
|
|
|
58
51
|
|
|
59
52
|
FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: dict[FieldTypeName, Any] = {
|
|
@@ -18,20 +18,18 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import urllib.parse
|
|
21
|
-
from
|
|
22
|
-
from typing import Optional
|
|
21
|
+
from typing import Annotated
|
|
23
22
|
|
|
24
23
|
from fastapi import HTTPException
|
|
25
24
|
from fastapi.requests import Request
|
|
26
25
|
from fastapi.responses import Response
|
|
27
26
|
from fastapi_versioning import version
|
|
28
|
-
from starlette.datastructures import Headers
|
|
29
27
|
from starlette.responses import StreamingResponse
|
|
30
28
|
|
|
29
|
+
from nucliadb.common import datamanagers
|
|
31
30
|
from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR
|
|
32
31
|
from nucliadb.common.models_utils import to_proto
|
|
33
|
-
from nucliadb.
|
|
34
|
-
from nucliadb.reader import SERVICE_NAME, logger
|
|
32
|
+
from nucliadb.reader import RANGE_HEADER, SERVICE_NAME, logger
|
|
35
33
|
from nucliadb_models.common import FieldTypeName
|
|
36
34
|
from nucliadb_models.resource import NucliaDBRoles
|
|
37
35
|
from nucliadb_utils.authentication import requires_one
|
|
@@ -41,13 +39,8 @@ from nucliadb_utils.utilities import get_storage
|
|
|
41
39
|
from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
|
|
42
40
|
|
|
43
41
|
|
|
44
|
-
class DownloadType(Enum):
|
|
45
|
-
EXTRACTED = "extracted"
|
|
46
|
-
FIELD = "field"
|
|
47
|
-
|
|
48
|
-
|
|
49
42
|
@api.get(
|
|
50
|
-
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}",
|
|
43
|
+
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}",
|
|
51
44
|
tags=["Resource fields"],
|
|
52
45
|
status_code=200,
|
|
53
46
|
summary="Download extracted binary file (by slug)",
|
|
@@ -61,12 +54,20 @@ async def download_extract_file_rslug_prefix(
|
|
|
61
54
|
field_type: FieldTypeName,
|
|
62
55
|
field_id: str,
|
|
63
56
|
download_field: str,
|
|
57
|
+
range: Annotated[str | None, RANGE_HEADER] = None,
|
|
64
58
|
) -> Response:
|
|
65
|
-
return await _download_extract_file(
|
|
59
|
+
return await _download_extract_file(
|
|
60
|
+
kbid,
|
|
61
|
+
field_type,
|
|
62
|
+
field_id,
|
|
63
|
+
download_field,
|
|
64
|
+
rslug=rslug,
|
|
65
|
+
range_request=range,
|
|
66
|
+
)
|
|
66
67
|
|
|
67
68
|
|
|
68
69
|
@api.get(
|
|
69
|
-
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}",
|
|
70
|
+
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}",
|
|
70
71
|
tags=["Resource fields"],
|
|
71
72
|
status_code=200,
|
|
72
73
|
summary="Download extracted binary file (by id)",
|
|
@@ -80,18 +81,21 @@ async def download_extract_file_rid_prefix(
|
|
|
80
81
|
field_type: FieldTypeName,
|
|
81
82
|
field_id: str,
|
|
82
83
|
download_field: str,
|
|
84
|
+
range: Annotated[str | None, RANGE_HEADER] = None,
|
|
83
85
|
) -> Response:
|
|
84
|
-
return await _download_extract_file(
|
|
86
|
+
return await _download_extract_file(
|
|
87
|
+
kbid, field_type, field_id, download_field, rid=rid, range_request=range
|
|
88
|
+
)
|
|
85
89
|
|
|
86
90
|
|
|
87
91
|
async def _download_extract_file(
|
|
88
|
-
request: Request,
|
|
89
92
|
kbid: str,
|
|
90
93
|
field_type: FieldTypeName,
|
|
91
94
|
field_id: str,
|
|
92
95
|
download_field: str,
|
|
93
|
-
rid:
|
|
94
|
-
rslug:
|
|
96
|
+
rid: str | None = None,
|
|
97
|
+
rslug: str | None = None,
|
|
98
|
+
range_request: str | None = None,
|
|
95
99
|
) -> Response:
|
|
96
100
|
rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
|
|
97
101
|
|
|
@@ -102,7 +106,7 @@ async def _download_extract_file(
|
|
|
102
106
|
|
|
103
107
|
sf = storage.file_extracted(kbid, rid, field_type_letter, field_id, download_field)
|
|
104
108
|
|
|
105
|
-
return await download_api(sf,
|
|
109
|
+
return await download_api(sf, range_request)
|
|
106
110
|
|
|
107
111
|
|
|
108
112
|
@api.get(
|
|
@@ -119,8 +123,9 @@ async def download_field_file_rslug_prefix(
|
|
|
119
123
|
rslug: str,
|
|
120
124
|
field_id: str,
|
|
121
125
|
inline: bool = False,
|
|
126
|
+
range: Annotated[str | None, RANGE_HEADER] = None,
|
|
122
127
|
) -> Response:
|
|
123
|
-
return await _download_field_file(
|
|
128
|
+
return await _download_field_file(kbid, field_id, rslug=rslug, range_request=range, inline=inline)
|
|
124
129
|
|
|
125
130
|
|
|
126
131
|
@api.get(
|
|
@@ -137,16 +142,17 @@ async def download_field_file_rid_prefix(
|
|
|
137
142
|
rid: str,
|
|
138
143
|
field_id: str,
|
|
139
144
|
inline: bool = False,
|
|
145
|
+
range: Annotated[str | None, RANGE_HEADER] = None,
|
|
140
146
|
) -> Response:
|
|
141
|
-
return await _download_field_file(
|
|
147
|
+
return await _download_field_file(kbid, field_id, rid=rid, range_request=range, inline=inline)
|
|
142
148
|
|
|
143
149
|
|
|
144
150
|
async def _download_field_file(
|
|
145
|
-
request: Request,
|
|
146
151
|
kbid: str,
|
|
147
152
|
field_id: str,
|
|
148
|
-
rid:
|
|
149
|
-
rslug:
|
|
153
|
+
rid: str | None = None,
|
|
154
|
+
rslug: str | None = None,
|
|
155
|
+
range_request: str | None = None,
|
|
150
156
|
inline: bool = False,
|
|
151
157
|
) -> Response:
|
|
152
158
|
rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
|
|
@@ -155,11 +161,11 @@ async def _download_field_file(
|
|
|
155
161
|
|
|
156
162
|
sf = storage.file_field(kbid, rid, field_id)
|
|
157
163
|
|
|
158
|
-
return await download_api(sf,
|
|
164
|
+
return await download_api(sf, range_request=range_request, inline=inline)
|
|
159
165
|
|
|
160
166
|
|
|
161
167
|
@api.get(
|
|
162
|
-
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}",
|
|
168
|
+
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}",
|
|
163
169
|
tags=["Resource fields"],
|
|
164
170
|
status_code=200,
|
|
165
171
|
summary="Download conversation binary field (by slug)",
|
|
@@ -173,14 +179,20 @@ async def download_field_conversation_rslug_prefix(
|
|
|
173
179
|
field_id: str,
|
|
174
180
|
message_id: str,
|
|
175
181
|
file_num: int,
|
|
182
|
+
range: Annotated[str | None, RANGE_HEADER] = None,
|
|
176
183
|
) -> Response:
|
|
177
184
|
return await _download_field_conversation_attachment(
|
|
178
|
-
|
|
185
|
+
kbid,
|
|
186
|
+
field_id,
|
|
187
|
+
message_id,
|
|
188
|
+
file_num,
|
|
189
|
+
rslug=rslug,
|
|
190
|
+
range_request=range,
|
|
179
191
|
)
|
|
180
192
|
|
|
181
193
|
|
|
182
194
|
@api.get(
|
|
183
|
-
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}",
|
|
195
|
+
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}",
|
|
184
196
|
tags=["Resource fields"],
|
|
185
197
|
status_code=200,
|
|
186
198
|
summary="Download conversation binary field (by id)",
|
|
@@ -194,20 +206,26 @@ async def download_field_conversation_attachment_rid_prefix(
|
|
|
194
206
|
field_id: str,
|
|
195
207
|
message_id: str,
|
|
196
208
|
file_num: int,
|
|
209
|
+
range: Annotated[str | None, RANGE_HEADER] = None,
|
|
197
210
|
) -> Response:
|
|
198
211
|
return await _download_field_conversation_attachment(
|
|
199
|
-
|
|
212
|
+
kbid,
|
|
213
|
+
field_id,
|
|
214
|
+
message_id,
|
|
215
|
+
file_num,
|
|
216
|
+
rid=rid,
|
|
217
|
+
range_request=range,
|
|
200
218
|
)
|
|
201
219
|
|
|
202
220
|
|
|
203
221
|
async def _download_field_conversation_attachment(
|
|
204
|
-
request: Request,
|
|
205
222
|
kbid: str,
|
|
206
223
|
field_id: str,
|
|
207
224
|
message_id: str,
|
|
208
225
|
file_num: int,
|
|
209
|
-
rid:
|
|
210
|
-
rslug:
|
|
226
|
+
rid: str | None = None,
|
|
227
|
+
rslug: str | None = None,
|
|
228
|
+
range_request: str | None = None,
|
|
211
229
|
) -> Response:
|
|
212
230
|
rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
|
|
213
231
|
|
|
@@ -217,11 +235,11 @@ async def _download_field_conversation_attachment(
|
|
|
217
235
|
kbid, rid, field_id, message_id, attachment_index=file_num
|
|
218
236
|
)
|
|
219
237
|
|
|
220
|
-
return await download_api(sf,
|
|
238
|
+
return await download_api(sf, range_request)
|
|
221
239
|
|
|
222
240
|
|
|
223
|
-
async def download_api(sf: StorageField,
|
|
224
|
-
metadata:
|
|
241
|
+
async def download_api(sf: StorageField, range_request: str | None = None, inline: bool = False):
|
|
242
|
+
metadata: ObjectMetadata | None = await sf.exists()
|
|
225
243
|
if metadata is None:
|
|
226
244
|
raise HTTPException(status_code=404, detail="Specified file doesn't exist")
|
|
227
245
|
|
|
@@ -240,9 +258,8 @@ async def download_api(sf: StorageField, headers: Headers, inline: bool = False)
|
|
|
240
258
|
}
|
|
241
259
|
|
|
242
260
|
range = Range()
|
|
243
|
-
if
|
|
261
|
+
if range_request and file_size > -1:
|
|
244
262
|
status_code = 206
|
|
245
|
-
range_request = headers["range"]
|
|
246
263
|
try:
|
|
247
264
|
start, end, range_size = parse_media_range(range_request, file_size)
|
|
248
265
|
except NotImplementedError:
|
|
@@ -295,13 +312,17 @@ async def download_api(sf: StorageField, headers: Headers, inline: bool = False)
|
|
|
295
312
|
)
|
|
296
313
|
|
|
297
314
|
|
|
298
|
-
async def _get_resource_uuid_from_params(kbid, rid:
|
|
315
|
+
async def _get_resource_uuid_from_params(kbid, rid: str | None, rslug: str | None) -> str:
|
|
299
316
|
if not any([rid, rslug]):
|
|
300
317
|
raise ValueError("Either rid or slug must be set")
|
|
301
318
|
|
|
302
319
|
if not rid:
|
|
303
320
|
# Attempt to get it from slug
|
|
304
|
-
rid = await
|
|
321
|
+
rid = await datamanagers.atomic.resources.get_resource_uuid_from_slug(
|
|
322
|
+
kbid=kbid,
|
|
323
|
+
# mypy doesn't infer that we already checked for slug to be something
|
|
324
|
+
slug=rslug, # type: ignore[arg-type]
|
|
325
|
+
)
|
|
305
326
|
if rid is None:
|
|
306
327
|
raise HTTPException(status_code=404, detail="Resource does not exist")
|
|
307
328
|
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from
|
|
20
|
+
from collections.abc import AsyncGenerator, AsyncIterable
|
|
21
21
|
|
|
22
22
|
from fastapi.responses import StreamingResponse
|
|
23
23
|
from fastapi_versioning import version
|
|
@@ -108,7 +108,7 @@ async def download_export_and_delete(
|
|
|
108
108
|
@version(1)
|
|
109
109
|
async def get_export_status_endpoint(
|
|
110
110
|
request: Request, kbid: str, export_id: str
|
|
111
|
-
) ->
|
|
111
|
+
) -> StatusResponse | HTTPClientError:
|
|
112
112
|
context = get_app_context(request.app)
|
|
113
113
|
if not await exists_kb(kbid):
|
|
114
114
|
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
|
@@ -127,7 +127,7 @@ async def get_export_status_endpoint(
|
|
|
127
127
|
@version(1)
|
|
128
128
|
async def get_import_status_endpoint(
|
|
129
129
|
request: Request, kbid: str, import_id: str
|
|
130
|
-
) ->
|
|
130
|
+
) -> StatusResponse | HTTPClientError:
|
|
131
131
|
context = get_app_context(request.app)
|
|
132
132
|
if not await exists_kb(kbid):
|
|
133
133
|
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
|
@@ -137,7 +137,7 @@ async def get_import_status_endpoint(
|
|
|
137
137
|
|
|
138
138
|
async def _get_status(
|
|
139
139
|
context: ApplicationContext, type: str, kbid: str, id: str
|
|
140
|
-
) ->
|
|
140
|
+
) -> StatusResponse | HTTPClientError:
|
|
141
141
|
if type not in ("export", "import"):
|
|
142
142
|
raise ValueError(f"Incorrect type: {type}")
|
|
143
143
|
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from fastapi import HTTPException
|
|
20
|
+
from fastapi import Header, HTTPException
|
|
21
21
|
from fastapi_versioning import version
|
|
22
22
|
from starlette.requests import Request
|
|
23
23
|
|
|
@@ -44,12 +44,20 @@ from nucliadb_utils.authentication import requires, requires_one
|
|
|
44
44
|
)
|
|
45
45
|
@requires(NucliaDBRoles.MANAGER)
|
|
46
46
|
@version(1)
|
|
47
|
-
async def get_kbs(
|
|
47
|
+
async def get_kbs(
|
|
48
|
+
request: Request,
|
|
49
|
+
prefix: str = "",
|
|
50
|
+
x_nucliadb_account: str = Header(default="", include_in_schema=False),
|
|
51
|
+
) -> KnowledgeBoxList:
|
|
48
52
|
driver = get_driver()
|
|
49
53
|
async with driver.ro_transaction() as txn:
|
|
50
54
|
response = KnowledgeBoxList()
|
|
51
55
|
async for kbid, slug in datamanagers.kb.get_kbs(txn, prefix=prefix):
|
|
52
|
-
response.kbs.append(
|
|
56
|
+
response.kbs.append(
|
|
57
|
+
KnowledgeBoxObjSummary(
|
|
58
|
+
slug=user_kb_slug(slug, account_id=x_nucliadb_account) or None, uuid=kbid
|
|
59
|
+
)
|
|
60
|
+
)
|
|
53
61
|
return response
|
|
54
62
|
|
|
55
63
|
|
|
@@ -62,7 +70,9 @@ async def get_kbs(request: Request, prefix: str = "") -> KnowledgeBoxList:
|
|
|
62
70
|
)
|
|
63
71
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.READER])
|
|
64
72
|
@version(1)
|
|
65
|
-
async def get_kb(
|
|
73
|
+
async def get_kb(
|
|
74
|
+
request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
|
|
75
|
+
) -> KnowledgeBoxObj:
|
|
66
76
|
driver = get_driver()
|
|
67
77
|
async with driver.ro_transaction() as txn:
|
|
68
78
|
kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
|
|
@@ -71,7 +81,7 @@ async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
|
71
81
|
|
|
72
82
|
return KnowledgeBoxObj(
|
|
73
83
|
uuid=kbid,
|
|
74
|
-
slug=kb_config.slug,
|
|
84
|
+
slug=user_kb_slug(kb_config.slug, account_id=x_nucliadb_account),
|
|
75
85
|
config=from_proto.knowledgebox_config(kb_config),
|
|
76
86
|
)
|
|
77
87
|
|
|
@@ -85,12 +95,18 @@ async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
|
85
95
|
)
|
|
86
96
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.READER])
|
|
87
97
|
@version(1)
|
|
88
|
-
async def get_kb_by_slug(
|
|
98
|
+
async def get_kb_by_slug(
|
|
99
|
+
request: Request, slug: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
|
|
100
|
+
) -> KnowledgeBoxObj:
|
|
89
101
|
driver = get_driver()
|
|
90
102
|
async with driver.ro_transaction() as txn:
|
|
91
|
-
|
|
103
|
+
# For cloud, the account id is prepended in order to be able to reuse the same slug in different accounts.
|
|
104
|
+
kbid = await datamanagers.kb.get_kb_uuid(txn, slug=f"{x_nucliadb_account}:{slug}")
|
|
92
105
|
if kbid is None:
|
|
93
|
-
|
|
106
|
+
# For onprem, the slug is fully controlled by the user
|
|
107
|
+
kbid = await datamanagers.kb.get_kb_uuid(txn, slug=slug)
|
|
108
|
+
if kbid is None:
|
|
109
|
+
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
|
94
110
|
|
|
95
111
|
kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
|
|
96
112
|
if kb_config is None:
|
|
@@ -98,6 +114,18 @@ async def get_kb_by_slug(request: Request, slug: str) -> KnowledgeBoxObj:
|
|
|
98
114
|
|
|
99
115
|
return KnowledgeBoxObj(
|
|
100
116
|
uuid=kbid,
|
|
101
|
-
slug=kb_config.slug,
|
|
117
|
+
slug=user_kb_slug(kb_config.slug, account_id=x_nucliadb_account),
|
|
102
118
|
config=from_proto.knowledgebox_config(kb_config),
|
|
103
119
|
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def user_kb_slug(stored_slug: str, account_id: str) -> str:
|
|
123
|
+
if account_id != "":
|
|
124
|
+
# On cloud deployments, backend prepends the account id to the user-defined slug.
|
|
125
|
+
# This is required to make kb slugs reused across different accounts using the same nucliadb.
|
|
126
|
+
# We strip it so the user does not see it.
|
|
127
|
+
return stored_slug.split(f"{account_id}:")[-1]
|
|
128
|
+
else:
|
|
129
|
+
# On on-prem deployments, the account_id is set to "" by default and we don't need to strip
|
|
130
|
+
# anything as the backend is not invovled in the kb creation process.
|
|
131
|
+
return stored_slug
|
|
@@ -17,9 +17,8 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import Dict
|
|
21
20
|
|
|
22
|
-
from fastapi import Request
|
|
21
|
+
from fastapi import Header, Request
|
|
23
22
|
from fastapi_versioning import version
|
|
24
23
|
from nuclia_models.config.proto import ExtractConfig, SplitConfiguration
|
|
25
24
|
|
|
@@ -35,7 +34,7 @@ from nucliadb_utils.settings import is_onprem_nucliadb
|
|
|
35
34
|
path=f"/{KB_PREFIX}/{{kbid}}/models/{{model_id}}/{{filename:path}}",
|
|
36
35
|
status_code=200,
|
|
37
36
|
summary="Download the Knowledege Box model",
|
|
38
|
-
description="Download the trained model or any other generated file as a result of a training task on a Knowledge Box.",
|
|
37
|
+
description="Download the trained model or any other generated file as a result of a training task on a Knowledge Box.",
|
|
39
38
|
response_model=None,
|
|
40
39
|
tags=["Models"],
|
|
41
40
|
)
|
|
@@ -60,15 +59,11 @@ async def download_model(
|
|
|
60
59
|
)
|
|
61
60
|
@requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
|
|
62
61
|
@version(1)
|
|
63
|
-
async def get_configuration(
|
|
64
|
-
request: Request,
|
|
65
|
-
kbid: str,
|
|
66
|
-
):
|
|
62
|
+
async def get_configuration(request: Request, kbid: str):
|
|
67
63
|
return await learning_config_proxy(
|
|
68
64
|
request,
|
|
69
65
|
"GET",
|
|
70
66
|
f"/config/{kbid}",
|
|
71
|
-
extra_headers={"X-STF-USER": request.headers.get("X-NUCLIADB-USER", "")},
|
|
72
67
|
)
|
|
73
68
|
|
|
74
69
|
|
|
@@ -108,7 +103,6 @@ async def get_model(
|
|
|
108
103
|
request,
|
|
109
104
|
"GET",
|
|
110
105
|
f"/models/{kbid}/model/{model_id}",
|
|
111
|
-
extra_headers={"X-STF-USER": request.headers.get("X-NUCLIADB-USER", "")},
|
|
112
106
|
)
|
|
113
107
|
|
|
114
108
|
|
|
@@ -123,10 +117,35 @@ async def get_model(
|
|
|
123
117
|
@requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
|
|
124
118
|
@version(1)
|
|
125
119
|
async def get_schema_for_configuration_updates(
|
|
126
|
-
request: Request,
|
|
127
|
-
kbid: str,
|
|
120
|
+
request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
|
|
128
121
|
):
|
|
129
|
-
return await learning_config_proxy(
|
|
122
|
+
return await learning_config_proxy(
|
|
123
|
+
request,
|
|
124
|
+
"GET",
|
|
125
|
+
f"/schema/{kbid}",
|
|
126
|
+
headers={"account-id": x_nucliadb_account},
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@api.get(
|
|
131
|
+
path=f"/{KB_PREFIX}/{{kbid}}/generative_providers",
|
|
132
|
+
status_code=200,
|
|
133
|
+
summary="Available models for a knowledge box",
|
|
134
|
+
description="Get all available models for a knowledge box grouped by provider",
|
|
135
|
+
response_model=None,
|
|
136
|
+
tags=["Models"],
|
|
137
|
+
)
|
|
138
|
+
@requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
|
|
139
|
+
@version(1)
|
|
140
|
+
async def get_models_group_by_providers(
|
|
141
|
+
request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
|
|
142
|
+
):
|
|
143
|
+
return await learning_config_proxy(
|
|
144
|
+
request,
|
|
145
|
+
"GET",
|
|
146
|
+
f"/generative_providers/{kbid}",
|
|
147
|
+
headers={"account-id": x_nucliadb_account},
|
|
148
|
+
)
|
|
130
149
|
|
|
131
150
|
|
|
132
151
|
@api.get(
|
|
@@ -153,7 +172,7 @@ async def get_schema_for_configuration_creation(
|
|
|
153
172
|
status_code=200,
|
|
154
173
|
summary="Learning extract strategies",
|
|
155
174
|
description="Get available extract strategies ",
|
|
156
|
-
response_model=
|
|
175
|
+
response_model=dict[str, ExtractConfig],
|
|
157
176
|
tags=["Extract Strategies"],
|
|
158
177
|
)
|
|
159
178
|
@requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
|
|
@@ -190,7 +209,7 @@ async def get_extract_strategy_from_id(
|
|
|
190
209
|
status_code=200,
|
|
191
210
|
summary="Learning split strategies",
|
|
192
211
|
description="Get available split strategies ",
|
|
193
|
-
response_model=
|
|
212
|
+
response_model=dict[str, SplitConfiguration],
|
|
194
213
|
tags=["Split Strategies"],
|
|
195
214
|
)
|
|
196
215
|
@requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
|