nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +8 -4
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +8 -4
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +8 -23
- nucliadb/common/cluster/rebalance.py +484 -112
- nucliadb/common/cluster/rollover.py +36 -9
- nucliadb/common/cluster/settings.py +4 -9
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +9 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +5 -34
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +129 -41
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +16 -23
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +82 -58
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +22 -5
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +10 -8
- nucliadb/ingest/consumer/service.py +5 -30
- nucliadb/ingest/consumer/shard_creator.py +16 -5
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +37 -49
- nucliadb/ingest/fields/conversation.py +55 -9
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +89 -57
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +128 -113
- nucliadb/ingest/orm/knowledgebox.py +91 -59
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +98 -153
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +82 -71
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +15 -114
- nucliadb/ingest/settings.py +36 -15
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +23 -26
- nucliadb/metrics_exporter.py +20 -6
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +4 -11
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +37 -9
- nucliadb/reader/api/v1/learning_config.py +33 -14
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +3 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +15 -19
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +28 -8
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +33 -19
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -42
- nucliadb/search/search/chat/ask.py +131 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +453 -32
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +49 -0
- nucliadb/search/search/hydrator/fields.py +217 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +323 -0
- nucliadb/search/search/hydrator/resources.py +60 -0
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +24 -7
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +44 -18
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -48
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +5 -6
- nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
- nucliadb/search/search/query_parser/parsers/common.py +21 -13
- nucliadb/search/search/query_parser/parsers/find.py +6 -29
- nucliadb/search/search/query_parser/parsers/graph.py +18 -28
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -56
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +6 -7
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +5 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +4 -10
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +15 -14
- nucliadb/writer/api/v1/knowledgebox.py +18 -56
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +43 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +5 -7
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +15 -22
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- nucliadb/search/search/hydrator.py +0 -197
- nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
|
@@ -17,14 +17,15 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
+
from collections.abc import AsyncGenerator, Callable, Coroutine, Sequence
|
|
20
21
|
from datetime import datetime
|
|
21
22
|
from functools import partial
|
|
22
|
-
from typing import Any
|
|
23
|
+
from typing import Any
|
|
23
24
|
from uuid import uuid4
|
|
24
25
|
|
|
25
26
|
from grpc import StatusCode
|
|
26
27
|
from grpc.aio import AioRpcError
|
|
27
|
-
from nidx_protos import noderesources_pb2
|
|
28
|
+
from nidx_protos import nidx_pb2, noderesources_pb2
|
|
28
29
|
|
|
29
30
|
from nucliadb.common import datamanagers
|
|
30
31
|
from nucliadb.common.cluster.exceptions import ShardNotFound
|
|
@@ -36,7 +37,6 @@ from nucliadb.common.datamanagers.resources import (
|
|
|
36
37
|
KB_RESOURCE_SLUG_BASE,
|
|
37
38
|
)
|
|
38
39
|
from nucliadb.common.external_index_providers.base import VectorsetExternalIndex
|
|
39
|
-
from nucliadb.common.external_index_providers.pinecone import PineconeIndexManager
|
|
40
40
|
from nucliadb.common.maindb.driver import Driver, Transaction
|
|
41
41
|
from nucliadb.common.maindb.pg import PGTransaction
|
|
42
42
|
from nucliadb.common.nidx import get_nidx_api_client
|
|
@@ -53,7 +53,6 @@ from nucliadb.migrator.utils import get_latest_version
|
|
|
53
53
|
from nucliadb_protos import knowledgebox_pb2, writer_pb2
|
|
54
54
|
from nucliadb_protos.knowledgebox_pb2 import (
|
|
55
55
|
CreateExternalIndexProviderMetadata,
|
|
56
|
-
ExternalIndexProviderType,
|
|
57
56
|
KnowledgeBoxConfig,
|
|
58
57
|
SemanticModelMetadata,
|
|
59
58
|
StoredExternalIndexProviderMetadata,
|
|
@@ -90,7 +89,7 @@ class KnowledgeBox:
|
|
|
90
89
|
self.txn = txn
|
|
91
90
|
self.storage = storage
|
|
92
91
|
self.kbid = kbid
|
|
93
|
-
self._config:
|
|
92
|
+
self._config: KnowledgeBoxConfig | None = None
|
|
94
93
|
|
|
95
94
|
@staticmethod
|
|
96
95
|
def new_unique_kbid() -> str:
|
|
@@ -110,6 +109,7 @@ class KnowledgeBox:
|
|
|
110
109
|
external_index_provider: CreateExternalIndexProviderMetadata = CreateExternalIndexProviderMetadata(),
|
|
111
110
|
hidden_resources_enabled: bool = False,
|
|
112
111
|
hidden_resources_hide_on_creation: bool = False,
|
|
112
|
+
prewarm_enabled: bool = False,
|
|
113
113
|
) -> tuple[str, str]:
|
|
114
114
|
"""Creates a new knowledge box and return its id and slug."""
|
|
115
115
|
|
|
@@ -196,6 +196,7 @@ class KnowledgeBox:
|
|
|
196
196
|
migration_version=get_latest_version(),
|
|
197
197
|
hidden_resources_enabled=hidden_resources_enabled,
|
|
198
198
|
hidden_resources_hide_on_creation=hidden_resources_hide_on_creation,
|
|
199
|
+
prewarm_enabled=prewarm_enabled,
|
|
199
200
|
)
|
|
200
201
|
config.external_index_provider.CopyFrom(stored_external_index_provider)
|
|
201
202
|
await datamanagers.kb.set_config(txn, kbid=kbid, config=config)
|
|
@@ -222,7 +223,7 @@ class KnowledgeBox:
|
|
|
222
223
|
shard_manager = get_shard_manager()
|
|
223
224
|
# XXX creating a shard is a slow IO operation that requires a write
|
|
224
225
|
# txn to be open!
|
|
225
|
-
await shard_manager.create_shard_by_kbid(txn, kbid)
|
|
226
|
+
await shard_manager.create_shard_by_kbid(txn, kbid, prewarm_enabled=prewarm_enabled)
|
|
226
227
|
# shards don't need a rollback as they will be eventually purged
|
|
227
228
|
|
|
228
229
|
await txn.commit()
|
|
@@ -245,39 +246,86 @@ class KnowledgeBox:
|
|
|
245
246
|
@classmethod
|
|
246
247
|
async def update(
|
|
247
248
|
cls,
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
249
|
+
driver: Driver,
|
|
250
|
+
kbid: str,
|
|
251
|
+
*,
|
|
252
|
+
slug: str | None = None,
|
|
253
|
+
title: str | None = None,
|
|
254
|
+
description: str | None = None,
|
|
255
|
+
migration_version: int | None = None,
|
|
256
|
+
external_index_provider: StoredExternalIndexProviderMetadata | None = None,
|
|
257
|
+
hidden_resources_enabled: bool | None = None,
|
|
258
|
+
hidden_resources_hide_on_creation: bool | None = None,
|
|
259
|
+
prewarm_enabled: bool | None = None,
|
|
252
260
|
) -> str:
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
else:
|
|
266
|
-
exist.slug = slug
|
|
261
|
+
async with driver.rw_transaction() as txn:
|
|
262
|
+
stored = await datamanagers.kb.get_config(txn, kbid=kbid, for_update=True)
|
|
263
|
+
if not stored:
|
|
264
|
+
raise datamanagers.exceptions.KnowledgeBoxNotFound()
|
|
265
|
+
|
|
266
|
+
if slug:
|
|
267
|
+
await txn.delete(datamanagers.kb.KB_SLUGS.format(slug=stored.slug))
|
|
268
|
+
await txn.set(
|
|
269
|
+
datamanagers.kb.KB_SLUGS.format(slug=slug),
|
|
270
|
+
kbid.encode(),
|
|
271
|
+
)
|
|
272
|
+
stored.slug = slug
|
|
267
273
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
274
|
+
if title is not None:
|
|
275
|
+
stored.title = title
|
|
276
|
+
if description is not None:
|
|
277
|
+
stored.description = description
|
|
272
278
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
279
|
+
if migration_version is not None:
|
|
280
|
+
stored.migration_version = migration_version
|
|
281
|
+
|
|
282
|
+
if external_index_provider is not None:
|
|
283
|
+
stored.external_index_provider.MergeFrom(external_index_provider)
|
|
284
|
+
|
|
285
|
+
if hidden_resources_enabled is not None:
|
|
286
|
+
stored.hidden_resources_enabled = hidden_resources_enabled
|
|
287
|
+
if hidden_resources_hide_on_creation is not None:
|
|
288
|
+
stored.hidden_resources_hide_on_creation = hidden_resources_hide_on_creation
|
|
289
|
+
|
|
290
|
+
update_nidx_prewarm = None
|
|
291
|
+
if prewarm_enabled is not None:
|
|
292
|
+
if stored.prewarm_enabled != prewarm_enabled:
|
|
293
|
+
update_nidx_prewarm = prewarm_enabled
|
|
294
|
+
stored.prewarm_enabled = prewarm_enabled
|
|
295
|
+
|
|
296
|
+
if stored.hidden_resources_hide_on_creation and not stored.hidden_resources_enabled:
|
|
297
|
+
raise KnowledgeBoxCreationError(
|
|
298
|
+
"Cannot hide new resources if the hidden resources feature is disabled"
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
await datamanagers.kb.set_config(txn, kbid=kbid, config=stored)
|
|
302
|
+
|
|
303
|
+
await txn.commit()
|
|
304
|
+
|
|
305
|
+
if update_nidx_prewarm is not None:
|
|
306
|
+
await cls.configure_shards(driver, kbid, prewarm=update_nidx_prewarm)
|
|
277
307
|
|
|
278
|
-
|
|
308
|
+
return kbid
|
|
309
|
+
|
|
310
|
+
@classmethod
|
|
311
|
+
async def configure_shards(cls, driver: Driver, kbid: str, *, prewarm: bool):
|
|
312
|
+
shards_obj = await datamanagers.atomic.cluster.get_kb_shards(kbid=kbid)
|
|
313
|
+
if shards_obj is None:
|
|
314
|
+
logger.warning(f"Shards not found for KB while updating pre-warm flag", extra={"kbid": kbid})
|
|
315
|
+
return
|
|
279
316
|
|
|
280
|
-
|
|
317
|
+
nidx_shard_ids = [shard.nidx_shard_id for shard in shards_obj.shards]
|
|
318
|
+
|
|
319
|
+
nidx_api = get_nidx_api_client()
|
|
320
|
+
if nidx_api is not None and len(nidx_shard_ids) > 0:
|
|
321
|
+
configs = [
|
|
322
|
+
nidx_pb2.ShardConfig(
|
|
323
|
+
shard_id=shard_id,
|
|
324
|
+
prewarm_enabled=prewarm,
|
|
325
|
+
)
|
|
326
|
+
for shard_id in nidx_shard_ids
|
|
327
|
+
]
|
|
328
|
+
await nidx_api.ConfigureShards(nidx_pb2.ShardsConfig(configs=configs))
|
|
281
329
|
|
|
282
330
|
@classmethod
|
|
283
331
|
async def delete(cls, driver: Driver, kbid: str):
|
|
@@ -381,7 +429,7 @@ class KnowledgeBox:
|
|
|
381
429
|
await txn.delete_by_prefix(prefix)
|
|
382
430
|
await txn.commit()
|
|
383
431
|
|
|
384
|
-
async def get_resource_shard(self, shard_id: str) ->
|
|
432
|
+
async def get_resource_shard(self, shard_id: str) -> writer_pb2.ShardObject | None:
|
|
385
433
|
async with datamanagers.with_ro_transaction() as txn:
|
|
386
434
|
pb = await datamanagers.cluster.get_kb_shards(txn, kbid=self.kbid)
|
|
387
435
|
if pb is None:
|
|
@@ -392,18 +440,8 @@ class KnowledgeBox:
|
|
|
392
440
|
return shard
|
|
393
441
|
return None
|
|
394
442
|
|
|
395
|
-
async def get(self, uuid: str) ->
|
|
396
|
-
|
|
397
|
-
if basic is None:
|
|
398
|
-
return None
|
|
399
|
-
return Resource(
|
|
400
|
-
txn=self.txn,
|
|
401
|
-
storage=self.storage,
|
|
402
|
-
kb=self,
|
|
403
|
-
uuid=uuid,
|
|
404
|
-
basic=basic,
|
|
405
|
-
disable_vectors=False,
|
|
406
|
-
)
|
|
443
|
+
async def get(self, uuid: str) -> Resource | None:
|
|
444
|
+
return await Resource.get(self.txn, self.kbid, uuid)
|
|
407
445
|
|
|
408
446
|
async def maindb_delete_resource(self, uuid: str):
|
|
409
447
|
basic = await datamanagers.resources.get_basic(self.txn, kbid=self.kbid, rid=uuid)
|
|
@@ -432,7 +470,7 @@ class KnowledgeBox:
|
|
|
432
470
|
with processor_observer({"type": "delete_resource_storage"}):
|
|
433
471
|
await self.storage_delete_resource(uuid)
|
|
434
472
|
|
|
435
|
-
async def get_resource_uuid_by_slug(self, slug: str) ->
|
|
473
|
+
async def get_resource_uuid_by_slug(self, slug: str) -> str | None:
|
|
436
474
|
return await datamanagers.resources.get_resource_uuid_from_slug(
|
|
437
475
|
self.txn, kbid=self.kbid, slug=slug
|
|
438
476
|
)
|
|
@@ -449,7 +487,7 @@ class KnowledgeBox:
|
|
|
449
487
|
key_ok = True
|
|
450
488
|
return slug
|
|
451
489
|
|
|
452
|
-
async def add_resource(self, uuid: str, slug: str, basic:
|
|
490
|
+
async def add_resource(self, uuid: str, slug: str, basic: Basic | None = None) -> Resource:
|
|
453
491
|
if basic is None:
|
|
454
492
|
basic = Basic()
|
|
455
493
|
if slug == "":
|
|
@@ -461,7 +499,7 @@ class KnowledgeBox:
|
|
|
461
499
|
return Resource(
|
|
462
500
|
storage=self.storage,
|
|
463
501
|
txn=self.txn,
|
|
464
|
-
|
|
502
|
+
kbid=self.kbid,
|
|
465
503
|
uuid=uuid,
|
|
466
504
|
basic=basic,
|
|
467
505
|
disable_vectors=False,
|
|
@@ -476,7 +514,7 @@ class KnowledgeBox:
|
|
|
476
514
|
yield Resource(
|
|
477
515
|
self.txn,
|
|
478
516
|
self.storage,
|
|
479
|
-
self,
|
|
517
|
+
self.kbid,
|
|
480
518
|
uuid,
|
|
481
519
|
disable_vectors=False,
|
|
482
520
|
)
|
|
@@ -535,10 +573,7 @@ class KnowledgeBox:
|
|
|
535
573
|
request: CreateExternalIndexProviderMetadata,
|
|
536
574
|
indexes: list[VectorsetExternalIndex],
|
|
537
575
|
) -> StoredExternalIndexProviderMetadata:
|
|
538
|
-
|
|
539
|
-
return StoredExternalIndexProviderMetadata(type=request.type)
|
|
540
|
-
# Only pinecone is supported for now
|
|
541
|
-
return await PineconeIndexManager.create_indexes(kbid, request, indexes)
|
|
576
|
+
return StoredExternalIndexProviderMetadata(type=request.type)
|
|
542
577
|
|
|
543
578
|
@classmethod
|
|
544
579
|
async def _maybe_delete_external_indexes(
|
|
@@ -546,10 +581,7 @@ class KnowledgeBox:
|
|
|
546
581
|
kbid: str,
|
|
547
582
|
stored: StoredExternalIndexProviderMetadata,
|
|
548
583
|
) -> None:
|
|
549
|
-
|
|
550
|
-
return
|
|
551
|
-
# Only pinecone is supported for now
|
|
552
|
-
await PineconeIndexManager.delete_indexes(kbid, stored)
|
|
584
|
+
return
|
|
553
585
|
|
|
554
586
|
|
|
555
587
|
def chunker(seq: Sequence, size: int):
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
from nucliadb.common.maindb.driver import Driver
|
|
21
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
|
22
21
|
from nucliadb.ingest.orm.resource import Resource
|
|
23
22
|
from nucliadb_protos import audit_pb2, writer_pb2
|
|
24
23
|
from nucliadb_protos.resources_pb2 import FieldType
|
|
@@ -35,8 +34,7 @@ async def collect_audit_fields(
|
|
|
35
34
|
|
|
36
35
|
audit_storage_fields: list[audit_pb2.AuditField] = []
|
|
37
36
|
async with driver.ro_transaction() as txn:
|
|
38
|
-
|
|
39
|
-
resource = Resource(txn, storage, kb, message.uuid)
|
|
37
|
+
resource = Resource(txn, storage, message.kbid, message.uuid)
|
|
40
38
|
field_keys = await resource.get_fields_ids()
|
|
41
39
|
|
|
42
40
|
for field_id, field_type in iterate_auditable_fields(field_keys, message):
|
|
@@ -20,7 +20,6 @@
|
|
|
20
20
|
|
|
21
21
|
import logging
|
|
22
22
|
from dataclasses import dataclass, field
|
|
23
|
-
from typing import Optional
|
|
24
23
|
|
|
25
24
|
from nucliadb.ingest.orm.resource import Resource
|
|
26
25
|
from nucliadb.ingest.processing import ProcessingEngine
|
|
@@ -94,7 +93,7 @@ def _generate_processing_payload_for_fields(
|
|
|
94
93
|
rid: str,
|
|
95
94
|
fields: GeneratedFields,
|
|
96
95
|
bm: writer_pb2.BrokerMessage,
|
|
97
|
-
) ->
|
|
96
|
+
) -> PushPayload | None:
|
|
98
97
|
partitioning = get_partitioning()
|
|
99
98
|
partition = partitioning.generate_partition(kbid, rid)
|
|
100
99
|
|