nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +8 -4
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +8 -4
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +8 -23
- nucliadb/common/cluster/rebalance.py +484 -112
- nucliadb/common/cluster/rollover.py +36 -9
- nucliadb/common/cluster/settings.py +4 -9
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +9 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +5 -34
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +129 -41
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +16 -23
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +82 -58
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +22 -5
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +10 -8
- nucliadb/ingest/consumer/service.py +5 -30
- nucliadb/ingest/consumer/shard_creator.py +16 -5
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +37 -49
- nucliadb/ingest/fields/conversation.py +55 -9
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +89 -57
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +128 -113
- nucliadb/ingest/orm/knowledgebox.py +91 -59
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +98 -153
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +82 -71
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +15 -114
- nucliadb/ingest/settings.py +36 -15
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +23 -26
- nucliadb/metrics_exporter.py +20 -6
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +4 -11
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +37 -9
- nucliadb/reader/api/v1/learning_config.py +33 -14
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +3 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +15 -19
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +28 -8
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +33 -19
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -42
- nucliadb/search/search/chat/ask.py +131 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +453 -32
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +49 -0
- nucliadb/search/search/hydrator/fields.py +217 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +323 -0
- nucliadb/search/search/hydrator/resources.py +60 -0
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +24 -7
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +44 -18
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -48
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +5 -6
- nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
- nucliadb/search/search/query_parser/parsers/common.py +21 -13
- nucliadb/search/search/query_parser/parsers/find.py +6 -29
- nucliadb/search/search/query_parser/parsers/graph.py +18 -28
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -56
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +6 -7
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +5 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +4 -10
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +15 -14
- nucliadb/writer/api/v1/knowledgebox.py +18 -56
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +43 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +5 -7
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +15 -22
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- nucliadb/search/search/hydrator.py +0 -197
- nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,6 @@ import argparse
|
|
|
21
21
|
import asyncio
|
|
22
22
|
import logging
|
|
23
23
|
from datetime import datetime
|
|
24
|
-
from typing import Optional
|
|
25
24
|
|
|
26
25
|
from nidx_protos.nodewriter_pb2 import (
|
|
27
26
|
NewShardRequest,
|
|
@@ -34,10 +33,12 @@ from nucliadb.common.external_index_providers.base import ExternalIndexManager
|
|
|
34
33
|
from nucliadb.common.external_index_providers.manager import (
|
|
35
34
|
get_external_index_manager,
|
|
36
35
|
)
|
|
36
|
+
from nucliadb.common.maindb.utils import get_driver
|
|
37
37
|
from nucliadb.common.nidx import get_nidx_api_client
|
|
38
38
|
from nucliadb.common.vector_index_config import nucliadb_index_config_to_nidx
|
|
39
|
+
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
|
39
40
|
from nucliadb.migrator.settings import settings
|
|
40
|
-
from nucliadb_protos import
|
|
41
|
+
from nucliadb_protos import writer_pb2
|
|
41
42
|
from nucliadb_telemetry import errors
|
|
42
43
|
|
|
43
44
|
from .utils import (
|
|
@@ -45,6 +46,7 @@ from .utils import (
|
|
|
45
46
|
get_resource,
|
|
46
47
|
get_rollover_resource_index_message,
|
|
47
48
|
index_resource_to_shard,
|
|
49
|
+
wait_for_nidx,
|
|
48
50
|
)
|
|
49
51
|
|
|
50
52
|
logger = logging.getLogger(__name__)
|
|
@@ -59,7 +61,7 @@ class UnexpectedRolloverError(Exception):
|
|
|
59
61
|
async def create_rollover_index(
|
|
60
62
|
app_context: ApplicationContext,
|
|
61
63
|
kbid: str,
|
|
62
|
-
external:
|
|
64
|
+
external: ExternalIndexManager | None = None,
|
|
63
65
|
) -> None:
|
|
64
66
|
"""
|
|
65
67
|
Creates a new index for a knowledgebox in the index node cluster (and to the external index provider if configured).
|
|
@@ -148,7 +150,6 @@ async def create_rollover_shards(
|
|
|
148
150
|
|
|
149
151
|
req = NewShardRequest(
|
|
150
152
|
kbid=kbid,
|
|
151
|
-
release_channel=utils_pb2.ReleaseChannel.STABLE,
|
|
152
153
|
vectorsets_configs=vectorsets,
|
|
153
154
|
)
|
|
154
155
|
|
|
@@ -172,7 +173,7 @@ async def create_rollover_shards(
|
|
|
172
173
|
return kb_shards
|
|
173
174
|
|
|
174
175
|
|
|
175
|
-
def _get_shard(shards: writer_pb2.Shards, shard_id: str) ->
|
|
176
|
+
def _get_shard(shards: writer_pb2.Shards, shard_id: str) -> writer_pb2.ShardObject | None:
|
|
176
177
|
for shard in shards.shards:
|
|
177
178
|
if shard_id == shard.shard:
|
|
178
179
|
return shard
|
|
@@ -220,7 +221,7 @@ def _to_ts(dt: datetime) -> int:
|
|
|
220
221
|
|
|
221
222
|
|
|
222
223
|
async def index_to_rollover_index(
|
|
223
|
-
app_context: ApplicationContext, kbid: str, external:
|
|
224
|
+
app_context: ApplicationContext, kbid: str, external: ExternalIndexManager | None = None
|
|
224
225
|
) -> None:
|
|
225
226
|
"""
|
|
226
227
|
Indexes all data in a kb in rollover indexes. This happens before the cutover.
|
|
@@ -254,6 +255,7 @@ async def index_to_rollover_index(
|
|
|
254
255
|
for rid in resource_ids
|
|
255
256
|
]
|
|
256
257
|
await asyncio.gather(*batch)
|
|
258
|
+
await wait_for_indexing_to_catch_up(app_context)
|
|
257
259
|
|
|
258
260
|
async with datamanagers.with_transaction() as txn:
|
|
259
261
|
state.resources_indexed = True
|
|
@@ -262,12 +264,28 @@ async def index_to_rollover_index(
|
|
|
262
264
|
await txn.commit()
|
|
263
265
|
|
|
264
266
|
|
|
267
|
+
async def wait_for_indexing_to_catch_up(app_context: ApplicationContext):
|
|
268
|
+
try:
|
|
269
|
+
app_context.nats_manager
|
|
270
|
+
except AssertionError:
|
|
271
|
+
logger.warning("Nats manager not initialized. Cannot wait for indexing to catch up")
|
|
272
|
+
return
|
|
273
|
+
max_pending = 1000
|
|
274
|
+
while True:
|
|
275
|
+
try:
|
|
276
|
+
await wait_for_nidx(app_context.nats_manager, max_wait_seconds=60, max_pending=max_pending)
|
|
277
|
+
return
|
|
278
|
+
except asyncio.TimeoutError:
|
|
279
|
+
logger.warning(f"Nidx is behind more than {max_pending} messages. Throttling rollover.")
|
|
280
|
+
await asyncio.sleep(30)
|
|
281
|
+
|
|
282
|
+
|
|
265
283
|
async def _index_resource_to_rollover_index(
|
|
266
284
|
app_context: ApplicationContext,
|
|
267
285
|
rollover_shards: writer_pb2.Shards,
|
|
268
286
|
kbid: str,
|
|
269
287
|
resource_id: str,
|
|
270
|
-
external:
|
|
288
|
+
external: ExternalIndexManager | None = None,
|
|
271
289
|
) -> None:
|
|
272
290
|
async with resource_index_semaphore:
|
|
273
291
|
async with datamanagers.with_transaction() as txn:
|
|
@@ -321,7 +339,7 @@ async def _index_resource_to_rollover_index(
|
|
|
321
339
|
|
|
322
340
|
|
|
323
341
|
async def cutover_index(
|
|
324
|
-
app_context: ApplicationContext, kbid: str, external:
|
|
342
|
+
app_context: ApplicationContext, kbid: str, external: ExternalIndexManager | None = None
|
|
325
343
|
) -> None:
|
|
326
344
|
"""
|
|
327
345
|
Swaps our the current active index for a knowledgebox.
|
|
@@ -415,9 +433,18 @@ async def cutover_shards(app_context: ApplicationContext, kbid: str) -> None:
|
|
|
415
433
|
|
|
416
434
|
await txn.commit()
|
|
417
435
|
|
|
436
|
+
# For KBs with pre-warm enabled, we must configure the new shards. There may
|
|
437
|
+
# be some small delay between this call and the shards being actually
|
|
438
|
+
# prewarmed, but rollovers are quite unusual and we prefer this rather than
|
|
439
|
+
# prewarming old and new shards at the same time
|
|
440
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
|
441
|
+
if kb_config is not None and kb_config.prewarm_enabled:
|
|
442
|
+
driver = get_driver()
|
|
443
|
+
await KnowledgeBox.configure_shards(driver, kbid, prewarm=True)
|
|
444
|
+
|
|
418
445
|
|
|
419
446
|
async def validate_indexed_data(
|
|
420
|
-
app_context: ApplicationContext, kbid: str, external:
|
|
447
|
+
app_context: ApplicationContext, kbid: str, external: ExternalIndexManager | None = None
|
|
421
448
|
) -> list[str]:
|
|
422
449
|
"""
|
|
423
450
|
Goes through all the resources in a knowledgebox and validates it
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import enum
|
|
21
|
-
from typing import Optional
|
|
22
21
|
|
|
23
22
|
from pydantic import Field
|
|
24
23
|
from pydantic_settings import BaseSettings
|
|
@@ -42,7 +41,7 @@ class Settings(BaseSettings):
|
|
|
42
41
|
description="Maximum number of paragraphs to target per shard",
|
|
43
42
|
)
|
|
44
43
|
max_resource_paragraphs: int = Field(
|
|
45
|
-
default=
|
|
44
|
+
default=300_000,
|
|
46
45
|
title="Max paragraphs per resource",
|
|
47
46
|
description="Maximum number of paragraphs allowed on a single resource",
|
|
48
47
|
)
|
|
@@ -52,13 +51,9 @@ class Settings(BaseSettings):
|
|
|
52
51
|
description="Maximum number of entity labels (/e/) per field that are indexed (excess is not indexed)",
|
|
53
52
|
)
|
|
54
53
|
|
|
55
|
-
nidx_api_address:
|
|
56
|
-
nidx_searcher_address:
|
|
57
|
-
|
|
58
|
-
)
|
|
59
|
-
nidx_indexer_address: Optional[str] = Field(
|
|
60
|
-
default=None, description="NIDX gRPC indexer API address"
|
|
61
|
-
)
|
|
54
|
+
nidx_api_address: str | None = Field(default=None, description="NIDX gRPC API address")
|
|
55
|
+
nidx_searcher_address: str | None = Field(default=None, description="NIDX gRPC searcher API address")
|
|
56
|
+
nidx_indexer_address: str | None = Field(default=None, description="NIDX gRPC indexer API address")
|
|
62
57
|
|
|
63
58
|
|
|
64
59
|
settings = Settings()
|
nucliadb/common/cluster/utils.py
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
import asyncio
|
|
20
20
|
import logging
|
|
21
|
-
from typing import TYPE_CHECKING
|
|
21
|
+
from typing import TYPE_CHECKING
|
|
22
22
|
|
|
23
23
|
import backoff
|
|
24
24
|
from nidx_protos import nodereader_pb2
|
|
@@ -32,6 +32,7 @@ from nucliadb.common.cluster.settings import settings
|
|
|
32
32
|
from nucliadb.ingest.orm import index_message
|
|
33
33
|
from nucliadb.ingest.orm.resource import Resource
|
|
34
34
|
from nucliadb_protos import writer_pb2
|
|
35
|
+
from nucliadb_utils.nats import NatsConnectionManager
|
|
35
36
|
from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
|
|
36
37
|
|
|
37
38
|
if TYPE_CHECKING: # pragma: no cover
|
|
@@ -45,13 +46,13 @@ logger = logging.getLogger(__name__)
|
|
|
45
46
|
_lock = asyncio.Lock()
|
|
46
47
|
|
|
47
48
|
|
|
48
|
-
async def setup_cluster() ->
|
|
49
|
+
async def setup_cluster() -> KBShardManager | StandaloneKBShardManager:
|
|
49
50
|
async with _lock:
|
|
50
51
|
if get_utility(Utility.SHARD_MANAGER) is not None:
|
|
51
52
|
# already setup
|
|
52
53
|
return get_utility(Utility.SHARD_MANAGER)
|
|
53
54
|
|
|
54
|
-
mng:
|
|
55
|
+
mng: KBShardManager | StandaloneKBShardManager
|
|
55
56
|
if settings.standalone_mode:
|
|
56
57
|
mng = StandaloneKBShardManager()
|
|
57
58
|
else:
|
|
@@ -69,17 +70,17 @@ def get_shard_manager() -> KBShardManager:
|
|
|
69
70
|
return get_utility(Utility.SHARD_MANAGER) # type: ignore
|
|
70
71
|
|
|
71
72
|
|
|
72
|
-
async def get_resource(kbid: str, resource_id: str) ->
|
|
73
|
+
async def get_resource(kbid: str, resource_id: str) -> Resource | None:
|
|
73
74
|
async with datamanagers.with_ro_transaction() as txn:
|
|
74
|
-
return await
|
|
75
|
+
return await Resource.get(txn, kbid=kbid, rid=resource_id)
|
|
75
76
|
|
|
76
77
|
|
|
77
78
|
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=8)
|
|
78
79
|
async def get_rollover_resource_index_message(
|
|
79
80
|
kbid: str, resource_id: str
|
|
80
|
-
) ->
|
|
81
|
+
) -> nodereader_pb2.Resource | None:
|
|
81
82
|
async with datamanagers.with_ro_transaction() as txn:
|
|
82
|
-
resource = await
|
|
83
|
+
resource = await Resource.get(txn, kbid=kbid, rid=resource_id)
|
|
83
84
|
if resource is None:
|
|
84
85
|
logger.warning(
|
|
85
86
|
"Resource not found while indexing, skipping",
|
|
@@ -97,7 +98,7 @@ async def index_resource_to_shard(
|
|
|
97
98
|
kbid: str,
|
|
98
99
|
resource_id: str,
|
|
99
100
|
shard: writer_pb2.ShardObject,
|
|
100
|
-
resource_index_message:
|
|
101
|
+
resource_index_message: nodereader_pb2.Resource | None = None,
|
|
101
102
|
) -> None:
|
|
102
103
|
logger.info("Indexing resource", extra={"kbid": kbid, "resource_id": resource_id})
|
|
103
104
|
sm = app_context.shard_manager
|
|
@@ -125,3 +126,28 @@ async def delete_resource_from_shard(
|
|
|
125
126
|
partition = partitioning.generate_partition(kbid, resource_id)
|
|
126
127
|
|
|
127
128
|
await sm.delete_resource(shard, resource_id, 0, str(partition), kbid)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
async def get_nats_consumer_pending_messages(
|
|
132
|
+
nats_manager: NatsConnectionManager, *, stream: str, consumer: str
|
|
133
|
+
) -> int:
|
|
134
|
+
# get raw js client
|
|
135
|
+
js = nats_manager.js
|
|
136
|
+
consumer_info = await js.consumer_info(stream, consumer)
|
|
137
|
+
return consumer_info.num_pending
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
async def wait_for_nidx(
|
|
141
|
+
nats_manager: NatsConnectionManager,
|
|
142
|
+
max_pending: int,
|
|
143
|
+
poll_interval_seconds: int = 5,
|
|
144
|
+
max_wait_seconds: int = 60,
|
|
145
|
+
):
|
|
146
|
+
async with asyncio.timeout(max_wait_seconds): # type: ignore
|
|
147
|
+
while True:
|
|
148
|
+
pending = await get_nats_consumer_pending_messages(
|
|
149
|
+
nats_manager, stream="nidx", consumer="nidx"
|
|
150
|
+
)
|
|
151
|
+
if pending < max_pending:
|
|
152
|
+
return
|
|
153
|
+
await asyncio.sleep(poll_interval_seconds)
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import asyncio
|
|
21
|
-
from typing import Optional
|
|
22
21
|
|
|
23
22
|
from nucliadb.common.cluster.manager import KBShardManager
|
|
24
23
|
from nucliadb.common.cluster.settings import in_standalone_mode
|
|
@@ -58,13 +57,13 @@ class ApplicationContext:
|
|
|
58
57
|
self.service_name = service_name
|
|
59
58
|
self._initialized: bool = False
|
|
60
59
|
self._lock = asyncio.Lock()
|
|
61
|
-
self._kv_driver:
|
|
62
|
-
self._blob_storage:
|
|
63
|
-
self._shard_manager:
|
|
64
|
-
self._partitioning:
|
|
65
|
-
self._nats_manager:
|
|
66
|
-
self._transaction:
|
|
67
|
-
self._nidx:
|
|
60
|
+
self._kv_driver: Driver | None = None
|
|
61
|
+
self._blob_storage: Storage | None = None
|
|
62
|
+
self._shard_manager: KBShardManager | None = None
|
|
63
|
+
self._partitioning: PartitionUtility | None = None
|
|
64
|
+
self._nats_manager: NatsConnectionManager | None = None
|
|
65
|
+
self._transaction: TransactionUtility | None = None
|
|
66
|
+
self._nidx: NidxUtility | None = None
|
|
68
67
|
self.enabled_kv_driver = kv_driver
|
|
69
68
|
self.enabled_blob_storage = blob_storage
|
|
70
69
|
self.enabled_shard_manager = shard_manager
|
|
@@ -19,7 +19,6 @@
|
|
|
19
19
|
#
|
|
20
20
|
|
|
21
21
|
from contextlib import asynccontextmanager
|
|
22
|
-
from typing import Optional
|
|
23
22
|
|
|
24
23
|
from fastapi import FastAPI
|
|
25
24
|
from starlette.routing import Mount
|
|
@@ -28,7 +27,7 @@ from nucliadb.common.context import ApplicationContext
|
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
@asynccontextmanager
|
|
31
|
-
async def inject_app_context(app: FastAPI, context:
|
|
30
|
+
async def inject_app_context(app: FastAPI, context: ApplicationContext | None = None):
|
|
32
31
|
if context is None:
|
|
33
32
|
context = ApplicationContext()
|
|
34
33
|
|
|
@@ -31,7 +31,6 @@
|
|
|
31
31
|
from . import (
|
|
32
32
|
atomic,
|
|
33
33
|
cluster,
|
|
34
|
-
entities,
|
|
35
34
|
exceptions,
|
|
36
35
|
fields,
|
|
37
36
|
kb,
|
|
@@ -47,7 +46,6 @@ from .utils import with_ro_transaction, with_rw_transaction, with_transaction
|
|
|
47
46
|
__all__ = (
|
|
48
47
|
"atomic",
|
|
49
48
|
"cluster",
|
|
50
|
-
"entities",
|
|
51
49
|
"exceptions",
|
|
52
50
|
"fields",
|
|
53
51
|
"kb",
|
|
@@ -57,7 +55,7 @@ __all__ = (
|
|
|
57
55
|
"search_configurations",
|
|
58
56
|
"synonyms",
|
|
59
57
|
"vectorsets",
|
|
60
|
-
"with_transaction",
|
|
61
|
-
"with_rw_transaction",
|
|
62
58
|
"with_ro_transaction",
|
|
59
|
+
"with_rw_transaction",
|
|
60
|
+
"with_transaction",
|
|
63
61
|
)
|
|
@@ -35,13 +35,15 @@ it's transaction
|
|
|
35
35
|
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
+
from collections.abc import Awaitable, Callable
|
|
38
39
|
from functools import wraps
|
|
39
|
-
from typing import
|
|
40
|
+
from typing import Concatenate, TypeVar
|
|
40
41
|
|
|
41
|
-
from typing_extensions import
|
|
42
|
+
from typing_extensions import ParamSpec
|
|
42
43
|
|
|
43
44
|
from nucliadb.common.maindb.driver import Transaction
|
|
44
45
|
|
|
46
|
+
from . import cluster as cluster_dm
|
|
45
47
|
from . import kb as kb_dm
|
|
46
48
|
from . import labels as labels_dm
|
|
47
49
|
from . import resources as resources_dm
|
|
@@ -73,6 +75,10 @@ def rw_txn_wrap(fun: Callable[Concatenate[Transaction, P], Awaitable[T]]) -> Cal
|
|
|
73
75
|
return wrapper
|
|
74
76
|
|
|
75
77
|
|
|
78
|
+
class cluster:
|
|
79
|
+
get_kb_shards = ro_txn_wrap(cluster_dm.get_kb_shards)
|
|
80
|
+
|
|
81
|
+
|
|
76
82
|
class kb:
|
|
77
83
|
exists_kb = ro_txn_wrap(kb_dm.exists_kb)
|
|
78
84
|
get_config = ro_txn_wrap(kb_dm.get_config)
|
|
@@ -83,6 +89,7 @@ class resources:
|
|
|
83
89
|
get_resource_uuid_from_slug = ro_txn_wrap(resources_dm.get_resource_uuid_from_slug)
|
|
84
90
|
resource_exists = ro_txn_wrap(resources_dm.resource_exists)
|
|
85
91
|
slug_exists = ro_txn_wrap(resources_dm.slug_exists)
|
|
92
|
+
get_all_field_ids = ro_txn_wrap(resources_dm.get_all_field_ids)
|
|
86
93
|
|
|
87
94
|
|
|
88
95
|
class labelset:
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import logging
|
|
21
|
-
from typing import Optional
|
|
22
21
|
|
|
23
22
|
from nucliadb.common.maindb.driver import Transaction
|
|
24
23
|
from nucliadb_protos import writer_pb2
|
|
@@ -33,7 +32,7 @@ KB_SHARDS = "/kbs/{kbid}/shards"
|
|
|
33
32
|
|
|
34
33
|
async def get_kb_shards(
|
|
35
34
|
txn: Transaction, *, kbid: str, for_update: bool = False
|
|
36
|
-
) ->
|
|
35
|
+
) -> writer_pb2.Shards | None:
|
|
37
36
|
key = KB_SHARDS.format(kbid=kbid)
|
|
38
37
|
return await get_kv_pb(txn, key, writer_pb2.Shards, for_update=for_update)
|
|
39
38
|
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from typing import Optional
|
|
22
21
|
|
|
23
22
|
from google.protobuf.message import Message
|
|
24
23
|
|
|
@@ -34,7 +33,7 @@ KB_RESOURCE_FIELD_STATUS = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/status"
|
|
|
34
33
|
|
|
35
34
|
async def get_raw(
|
|
36
35
|
txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
|
|
37
|
-
) ->
|
|
36
|
+
) -> bytes | None:
|
|
38
37
|
key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
|
39
38
|
return await txn.get(key)
|
|
40
39
|
|
|
@@ -62,7 +61,7 @@ async def delete(txn: Transaction, *, kbid: str, rid: str, field_type: str, fiel
|
|
|
62
61
|
|
|
63
62
|
async def get_error(
|
|
64
63
|
txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
|
|
65
|
-
) ->
|
|
64
|
+
) -> writer_pb2.Error | None:
|
|
66
65
|
key = KB_RESOURCE_FIELD_ERROR.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
|
67
66
|
return await get_kv_pb(txn, key, writer_pb2.Error)
|
|
68
67
|
|
|
@@ -85,7 +84,7 @@ async def set_error(
|
|
|
85
84
|
|
|
86
85
|
async def get_status(
|
|
87
86
|
txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
|
|
88
|
-
) ->
|
|
87
|
+
) -> writer_pb2.FieldStatus | None:
|
|
89
88
|
key = KB_RESOURCE_FIELD_STATUS.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
|
90
89
|
return await get_kv_pb(txn, key, writer_pb2.FieldStatus)
|
|
91
90
|
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import logging
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncIterator
|
|
22
22
|
|
|
23
23
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
|
24
24
|
from nucliadb.common.maindb.driver import Transaction
|
|
@@ -47,7 +47,7 @@ async def exists_kb(txn: Transaction, *, kbid: str) -> bool:
|
|
|
47
47
|
return await get_config(txn, kbid=kbid, for_update=False) is not None
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
async def get_kb_uuid(txn: Transaction, *, slug: str) ->
|
|
50
|
+
async def get_kb_uuid(txn: Transaction, *, slug: str) -> str | None:
|
|
51
51
|
uuid = await txn.get(KB_SLUGS.format(slug=slug), for_update=False)
|
|
52
52
|
if uuid is not None:
|
|
53
53
|
return uuid.decode()
|
|
@@ -67,7 +67,7 @@ async def delete_kb_slug(txn: Transaction, *, slug: str):
|
|
|
67
67
|
|
|
68
68
|
async def get_config(
|
|
69
69
|
txn: Transaction, *, kbid: str, for_update: bool = False
|
|
70
|
-
) ->
|
|
70
|
+
) -> knowledgebox_pb2.KnowledgeBoxConfig | None:
|
|
71
71
|
key = KB_UUID.format(kbid=kbid)
|
|
72
72
|
payload = await txn.get(key, for_update=for_update)
|
|
73
73
|
if payload is None:
|
|
@@ -105,8 +105,8 @@ async def get_matryoshka_vector_dimension(
|
|
|
105
105
|
txn: Transaction,
|
|
106
106
|
*,
|
|
107
107
|
kbid: str,
|
|
108
|
-
vectorset_id:
|
|
109
|
-
) ->
|
|
108
|
+
vectorset_id: str | None = None,
|
|
109
|
+
) -> int | None:
|
|
110
110
|
"""Return vector dimension for matryoshka models"""
|
|
111
111
|
from . import vectorsets
|
|
112
112
|
|
|
@@ -145,7 +145,7 @@ async def get_matryoshka_vector_dimension(
|
|
|
145
145
|
|
|
146
146
|
async def get_external_index_provider_metadata(
|
|
147
147
|
txn: Transaction, *, kbid: str
|
|
148
|
-
) ->
|
|
148
|
+
) -> knowledgebox_pb2.StoredExternalIndexProviderMetadata | None:
|
|
149
149
|
kb_config = await get_config(txn, kbid=kbid)
|
|
150
150
|
if kb_config is None:
|
|
151
151
|
return None
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import logging
|
|
21
|
-
from typing import Optional
|
|
22
21
|
|
|
23
22
|
import orjson
|
|
24
23
|
|
|
@@ -50,7 +49,7 @@ async def get_labels(txn: Transaction, *, kbid: str) -> kb_pb2.Labels:
|
|
|
50
49
|
return labels
|
|
51
50
|
|
|
52
51
|
|
|
53
|
-
async def _get_labelset_ids(txn: Transaction, *, kbid: str) ->
|
|
52
|
+
async def _get_labelset_ids(txn: Transaction, *, kbid: str) -> list[str] | None:
|
|
54
53
|
key = KB_LABELSET_IDS.format(kbid=kbid)
|
|
55
54
|
data = await txn.get(key, for_update=True)
|
|
56
55
|
if not data:
|
|
@@ -84,7 +83,7 @@ async def _set_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]
|
|
|
84
83
|
await txn.set(key, data)
|
|
85
84
|
|
|
86
85
|
|
|
87
|
-
async def get_labelset(txn: Transaction, *, kbid: str, labelset_id: str) ->
|
|
86
|
+
async def get_labelset(txn: Transaction, *, kbid: str, labelset_id: str) -> kb_pb2.LabelSet | None:
|
|
88
87
|
labelset_key = KB_LABELSET.format(kbid=kbid, id=labelset_id)
|
|
89
88
|
payload = await txn.get(labelset_key)
|
|
90
89
|
if payload:
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from
|
|
20
|
+
from collections.abc import AsyncGenerator
|
|
21
21
|
|
|
22
22
|
import backoff
|
|
23
23
|
|
|
@@ -28,14 +28,9 @@ from nucliadb.common.maindb.exceptions import ConflictError, NotFoundError
|
|
|
28
28
|
# These should be refactored
|
|
29
29
|
from nucliadb.ingest.settings import settings as ingest_settings
|
|
30
30
|
from nucliadb_protos import resources_pb2
|
|
31
|
-
from nucliadb_utils.utilities import get_storage
|
|
32
31
|
|
|
33
32
|
from .utils import with_ro_transaction
|
|
34
33
|
|
|
35
|
-
if TYPE_CHECKING:
|
|
36
|
-
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
|
37
|
-
|
|
38
|
-
|
|
39
34
|
KB_RESOURCE_BASIC = "/kbs/{kbid}/r/{uuid}"
|
|
40
35
|
KB_RESOURCE_BASIC_FS = "/kbs/{kbid}/r/{uuid}/basic" # Only used on FS driver
|
|
41
36
|
KB_RESOURCE_ORIGIN = "/kbs/{kbid}/r/{uuid}/origin"
|
|
@@ -61,7 +56,7 @@ async def resource_exists(txn: Transaction, *, kbid: str, rid: str) -> bool:
|
|
|
61
56
|
# id and slug
|
|
62
57
|
|
|
63
58
|
|
|
64
|
-
async def get_resource_uuid_from_slug(txn: Transaction, *, kbid: str, slug: str) ->
|
|
59
|
+
async def get_resource_uuid_from_slug(txn: Transaction, *, kbid: str, slug: str) -> str | None:
|
|
65
60
|
encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug, for_update=False))
|
|
66
61
|
if not encoded_uuid:
|
|
67
62
|
return None
|
|
@@ -70,7 +65,7 @@ async def get_resource_uuid_from_slug(txn: Transaction, *, kbid: str, slug: str)
|
|
|
70
65
|
|
|
71
66
|
async def slug_exists(txn: Transaction, *, kbid: str, slug: str) -> bool:
|
|
72
67
|
key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug)
|
|
73
|
-
encoded_slug:
|
|
68
|
+
encoded_slug: bytes | None = await txn.get(key)
|
|
74
69
|
return encoded_slug not in (None, b"")
|
|
75
70
|
|
|
76
71
|
|
|
@@ -102,7 +97,7 @@ async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -
|
|
|
102
97
|
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
|
103
98
|
async def get_resource_shard_id(
|
|
104
99
|
txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
|
|
105
|
-
) ->
|
|
100
|
+
) -> str | None:
|
|
106
101
|
key = KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid)
|
|
107
102
|
shard = await txn.get(key, for_update=for_update)
|
|
108
103
|
if shard is not None:
|
|
@@ -118,7 +113,7 @@ async def set_resource_shard_id(txn: Transaction, *, kbid: str, rid: str, shard:
|
|
|
118
113
|
# Basic
|
|
119
114
|
|
|
120
115
|
|
|
121
|
-
async def get_basic(txn: Transaction, *, kbid: str, rid: str) ->
|
|
116
|
+
async def get_basic(txn: Transaction, *, kbid: str, rid: str) -> resources_pb2.Basic | None:
|
|
122
117
|
raw = await get_basic_raw(txn, kbid=kbid, rid=rid)
|
|
123
118
|
if raw is None:
|
|
124
119
|
return None
|
|
@@ -127,7 +122,7 @@ async def get_basic(txn: Transaction, *, kbid: str, rid: str) -> Optional[resour
|
|
|
127
122
|
return basic
|
|
128
123
|
|
|
129
124
|
|
|
130
|
-
async def get_basic_raw(txn: Transaction, *, kbid: str, rid: str) ->
|
|
125
|
+
async def get_basic_raw(txn: Transaction, *, kbid: str, rid: str) -> bytes | None:
|
|
131
126
|
if ingest_settings.driver == "local":
|
|
132
127
|
raw_basic = await txn.get(KB_RESOURCE_BASIC_FS.format(kbid=kbid, uuid=rid))
|
|
133
128
|
else:
|
|
@@ -151,7 +146,7 @@ async def set_basic(txn: Transaction, *, kbid: str, rid: str, basic: resources_p
|
|
|
151
146
|
# Origin
|
|
152
147
|
|
|
153
148
|
|
|
154
|
-
async def get_origin(txn: Transaction, *, kbid: str, rid: str) ->
|
|
149
|
+
async def get_origin(txn: Transaction, *, kbid: str, rid: str) -> resources_pb2.Origin | None:
|
|
155
150
|
key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
|
|
156
151
|
return await get_kv_pb(txn, key, resources_pb2.Origin)
|
|
157
152
|
|
|
@@ -164,7 +159,7 @@ async def set_origin(txn: Transaction, *, kbid: str, rid: str, origin: resources
|
|
|
164
159
|
# Extra
|
|
165
160
|
|
|
166
161
|
|
|
167
|
-
async def get_extra(txn: Transaction, *, kbid: str, rid: str) ->
|
|
162
|
+
async def get_extra(txn: Transaction, *, kbid: str, rid: str) -> resources_pb2.Extra | None:
|
|
168
163
|
key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
|
|
169
164
|
return await get_kv_pb(txn, key, resources_pb2.Extra)
|
|
170
165
|
|
|
@@ -177,7 +172,7 @@ async def set_extra(txn: Transaction, *, kbid: str, rid: str, extra: resources_p
|
|
|
177
172
|
# Security
|
|
178
173
|
|
|
179
174
|
|
|
180
|
-
async def get_security(txn: Transaction, *, kbid: str, rid: str) ->
|
|
175
|
+
async def get_security(txn: Transaction, *, kbid: str, rid: str) -> resources_pb2.Security | None:
|
|
181
176
|
key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
|
|
182
177
|
return await get_kv_pb(txn, key, resources_pb2.Security)
|
|
183
178
|
|
|
@@ -265,7 +260,7 @@ async def set_number_of_resources(txn: Transaction, kbid: str, value: int) -> No
|
|
|
265
260
|
|
|
266
261
|
async def get_all_field_ids(
|
|
267
262
|
txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
|
|
268
|
-
) ->
|
|
263
|
+
) -> resources_pb2.AllFieldIDs | None:
|
|
269
264
|
key = KB_RESOURCE_ALL_FIELDS.format(kbid=kbid, uuid=rid)
|
|
270
265
|
return await get_kv_pb(txn, key, resources_pb2.AllFieldIDs, for_update=for_update)
|
|
271
266
|
|
|
@@ -285,21 +280,3 @@ async def has_field(txn: Transaction, *, kbid: str, rid: str, field_id: resource
|
|
|
285
280
|
if field_id == resource_field_id:
|
|
286
281
|
return True
|
|
287
282
|
return False
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
# ORM mix (this functions shouldn't belong here)
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
|
294
|
-
async def get_resource(txn: Transaction, *, kbid: str, rid: str) -> Optional["ResourceORM"]:
|
|
295
|
-
"""
|
|
296
|
-
Not ideal to return Resource type here but refactoring would
|
|
297
|
-
require a lot of changes.
|
|
298
|
-
|
|
299
|
-
At least this isolated that dependency here.
|
|
300
|
-
"""
|
|
301
|
-
# prevent circulat imports -- this is not ideal that we have the ORM mix here.
|
|
302
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
|
303
|
-
|
|
304
|
-
kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
|
|
305
|
-
return await kb_orm.get(rid)
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import logging
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncGenerator
|
|
22
22
|
|
|
23
23
|
import orjson
|
|
24
24
|
from pydantic import BaseModel
|
|
@@ -56,7 +56,7 @@ class RolloverStateNotFoundError(Exception):
|
|
|
56
56
|
...
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
async def get_kb_rollover_shards(txn: Transaction, *, kbid: str) ->
|
|
59
|
+
async def get_kb_rollover_shards(txn: Transaction, *, kbid: str) -> writer_pb2.Shards | None:
|
|
60
60
|
key = KB_ROLLOVER_SHARDS.format(kbid=kbid)
|
|
61
61
|
return await get_kv_pb(txn, key, writer_pb2.Shards)
|
|
62
62
|
|
|
@@ -90,7 +90,7 @@ async def add_batch_to_index(txn: Transaction, *, kbid: str, batch: list[str]) -
|
|
|
90
90
|
await txn.set(key, b"")
|
|
91
91
|
|
|
92
92
|
|
|
93
|
-
async def get_to_index(txn: Transaction, *, kbid: str, count: int) ->
|
|
93
|
+
async def get_to_index(txn: Transaction, *, kbid: str, count: int) -> list[str] | None:
|
|
94
94
|
key = KB_ROLLOVER_RESOURCES_TO_INDEX.format(kbid=kbid, resource="")
|
|
95
95
|
found = [key async for key in txn.keys(key, count=count)]
|
|
96
96
|
if found:
|
|
@@ -118,9 +118,7 @@ async def add_indexed(
|
|
|
118
118
|
await txn.set(indexed, orjson.dumps(data))
|
|
119
119
|
|
|
120
120
|
|
|
121
|
-
async def get_indexed_data(
|
|
122
|
-
txn: Transaction, *, kbid: str, resource_id: str
|
|
123
|
-
) -> Optional[tuple[str, int]]:
|
|
121
|
+
async def get_indexed_data(txn: Transaction, *, kbid: str, resource_id: str) -> tuple[str, int] | None:
|
|
124
122
|
key = KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource=resource_id)
|
|
125
123
|
val = await txn.get(key)
|
|
126
124
|
if val is not None:
|
|
@@ -213,7 +211,7 @@ async def update_kb_rollover_external_index_metadata(
|
|
|
213
211
|
|
|
214
212
|
async def get_kb_rollover_external_index_metadata(
|
|
215
213
|
txn: Transaction, *, kbid: str
|
|
216
|
-
) ->
|
|
214
|
+
) -> kb_pb2.StoredExternalIndexProviderMetadata | None:
|
|
217
215
|
key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
|
|
218
216
|
val = await txn.get(key)
|
|
219
217
|
if not val:
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import logging
|
|
21
|
-
from typing import Optional
|
|
22
21
|
|
|
23
22
|
from pydantic import TypeAdapter
|
|
24
23
|
|
|
@@ -31,7 +30,7 @@ KB_SEARCH_CONFIGURATION_PREFIX = "/kbs/{kbid}/search_configuration"
|
|
|
31
30
|
KB_SEARCH_CONFIGURATION = "/kbs/{kbid}/search_configuration/{name}"
|
|
32
31
|
|
|
33
32
|
|
|
34
|
-
async def get(txn: Transaction, *, kbid: str, name: str) ->
|
|
33
|
+
async def get(txn: Transaction, *, kbid: str, name: str) -> SearchConfiguration | None:
|
|
35
34
|
key = KB_SEARCH_CONFIGURATION.format(kbid=kbid, name=name)
|
|
36
35
|
data = await txn.get(key, for_update=True)
|
|
37
36
|
if not data:
|