nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +8 -4
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +8 -4
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +8 -23
- nucliadb/common/cluster/rebalance.py +484 -112
- nucliadb/common/cluster/rollover.py +36 -9
- nucliadb/common/cluster/settings.py +4 -9
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +9 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +5 -34
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +129 -41
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +16 -23
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +82 -58
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +22 -5
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +10 -8
- nucliadb/ingest/consumer/service.py +5 -30
- nucliadb/ingest/consumer/shard_creator.py +16 -5
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +37 -49
- nucliadb/ingest/fields/conversation.py +55 -9
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +89 -57
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +128 -113
- nucliadb/ingest/orm/knowledgebox.py +91 -59
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +98 -153
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +82 -71
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +15 -114
- nucliadb/ingest/settings.py +36 -15
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +23 -26
- nucliadb/metrics_exporter.py +20 -6
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +4 -11
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +37 -9
- nucliadb/reader/api/v1/learning_config.py +33 -14
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +3 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +15 -19
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +28 -8
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +33 -19
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -42
- nucliadb/search/search/chat/ask.py +131 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +453 -32
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +49 -0
- nucliadb/search/search/hydrator/fields.py +217 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +323 -0
- nucliadb/search/search/hydrator/resources.py +60 -0
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +24 -7
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +44 -18
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -48
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +5 -6
- nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
- nucliadb/search/search/query_parser/parsers/common.py +21 -13
- nucliadb/search/search/query_parser/parsers/find.py +6 -29
- nucliadb/search/search/query_parser/parsers/graph.py +18 -28
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -56
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +6 -7
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +5 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +4 -10
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +15 -14
- nucliadb/writer/api/v1/knowledgebox.py +18 -56
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +43 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +5 -7
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +15 -22
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- nucliadb/search/search/hydrator.py +0 -197
- nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
nucliadb/export_import/utils.py
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
import functools
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncGenerator, AsyncIterator, Callable
|
|
22
22
|
|
|
23
23
|
import backoff
|
|
24
24
|
from google.protobuf.message import DecodeError as ProtobufDecodeError
|
|
@@ -35,6 +35,7 @@ from nucliadb.export_import.exceptions import (
|
|
|
35
35
|
)
|
|
36
36
|
from nucliadb.export_import.models import ExportedItemType, ExportItem, Metadata
|
|
37
37
|
from nucliadb.ingest.orm.broker_message import generate_broker_message
|
|
38
|
+
from nucliadb.ingest.orm.resource import Resource
|
|
38
39
|
from nucliadb_models.configuration import SearchConfiguration
|
|
39
40
|
from nucliadb_models.export_import import Status
|
|
40
41
|
from nucliadb_protos import knowledgebox_pb2 as kb_pb2
|
|
@@ -171,14 +172,6 @@ async def import_binary(
|
|
|
171
172
|
)
|
|
172
173
|
|
|
173
174
|
|
|
174
|
-
async def set_entities_groups(
|
|
175
|
-
context: ApplicationContext, kbid: str, entities_groups: kb_pb2.EntitiesGroups
|
|
176
|
-
) -> None:
|
|
177
|
-
async with datamanagers.with_transaction() as txn:
|
|
178
|
-
await datamanagers.entities.set_entities_groups(txn, kbid=kbid, entities_groups=entities_groups)
|
|
179
|
-
await txn.commit()
|
|
180
|
-
|
|
181
|
-
|
|
182
175
|
async def set_synonyms(context: ApplicationContext, kbid: str, synonyms: kb_pb2.Synonyms) -> None:
|
|
183
176
|
async with datamanagers.with_transaction() as txn:
|
|
184
177
|
await datamanagers.synonyms.set(txn, kbid=kbid, synonyms=synonyms)
|
|
@@ -207,9 +200,9 @@ async def iter_kb_resource_uuids(context: ApplicationContext, kbid: str) -> Asyn
|
|
|
207
200
|
|
|
208
201
|
async def get_broker_message(
|
|
209
202
|
context: ApplicationContext, kbid: str, rid: str
|
|
210
|
-
) ->
|
|
203
|
+
) -> writer_pb2.BrokerMessage | None:
|
|
211
204
|
async with datamanagers.with_ro_transaction() as txn:
|
|
212
|
-
resource = await
|
|
205
|
+
resource = await Resource.get(txn, kbid=kbid, rid=rid)
|
|
213
206
|
if resource is None:
|
|
214
207
|
return None
|
|
215
208
|
resource.disable_vectors = False
|
|
@@ -284,11 +277,6 @@ async def download_binary(
|
|
|
284
277
|
assert downloaded_bytes == cf.size, "Downloaded bytes do not match the expected size"
|
|
285
278
|
|
|
286
279
|
|
|
287
|
-
async def get_entities(context: ApplicationContext, kbid: str) -> kb_pb2.EntitiesGroups:
|
|
288
|
-
async with datamanagers.with_ro_transaction() as txn:
|
|
289
|
-
return await datamanagers.entities.get_entities_groups(txn, kbid=kbid)
|
|
290
|
-
|
|
291
|
-
|
|
292
280
|
async def get_labels(context: ApplicationContext, kbid: str) -> kb_pb2.Labels:
|
|
293
281
|
async with datamanagers.with_ro_transaction() as txn:
|
|
294
282
|
return await datamanagers.labels.get_labels(txn, kbid=kbid)
|
|
@@ -434,7 +422,7 @@ class ExportStreamReader:
|
|
|
434
422
|
|
|
435
423
|
async def maybe_read_learning_config(
|
|
436
424
|
self,
|
|
437
|
-
) -> tuple[
|
|
425
|
+
) -> tuple[learning_proxy.LearningConfiguration | None, bytes]:
|
|
438
426
|
"""
|
|
439
427
|
Tries to read a learning config from the beginning of the stream.
|
|
440
428
|
Returs the learning config if found. It also returns any leftover bytes that
|
|
@@ -533,7 +521,7 @@ class TaskRetryHandler:
|
|
|
533
521
|
|
|
534
522
|
async def get_learning_config(
|
|
535
523
|
kbid: str,
|
|
536
|
-
) ->
|
|
524
|
+
) -> learning_proxy.LearningConfiguration | None:
|
|
537
525
|
return await learning_proxy.get_configuration(kbid)
|
|
538
526
|
|
|
539
527
|
|
nucliadb/health.py
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#
|
|
20
20
|
import asyncio
|
|
21
21
|
import logging
|
|
22
|
-
from
|
|
22
|
+
from collections.abc import Awaitable, Callable
|
|
23
23
|
|
|
24
24
|
from grpc import aio
|
|
25
25
|
from grpc_health.v1 import health, health_pb2, health_pb2_grpc
|
|
@@ -41,7 +41,7 @@ def nats_manager_healthy() -> bool:
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
def pubsub_check() -> bool:
|
|
44
|
-
driver:
|
|
44
|
+
driver: PubSubDriver | None = get_utility(Utility.PUBSUB)
|
|
45
45
|
if driver is None:
|
|
46
46
|
return True
|
|
47
47
|
if isinstance(driver, NatsPubsub):
|
nucliadb/ingest/app.py
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#
|
|
20
20
|
import asyncio
|
|
21
21
|
import importlib.metadata
|
|
22
|
-
from
|
|
22
|
+
from collections.abc import Awaitable, Callable
|
|
23
23
|
|
|
24
24
|
from nucliadb import health
|
|
25
25
|
from nucliadb.backups.tasks import initialize_consumers as initialize_backup_consumers
|
|
@@ -96,7 +96,7 @@ async def initialize_grpc(): # pragma: no cover
|
|
|
96
96
|
finalizers = await initialize()
|
|
97
97
|
grpc_finalizer = await start_grpc(SERVICE_NAME)
|
|
98
98
|
|
|
99
|
-
return [grpc_finalizer
|
|
99
|
+
return [grpc_finalizer, *finalizers]
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
async def initialize_pull_workers() -> list[Callable[[], Awaitable[None]]]:
|
|
@@ -114,14 +114,14 @@ async def main_consumer(): # pragma: no cover
|
|
|
114
114
|
|
|
115
115
|
ingest_consumers = await consumer_service.start_ingest_consumers(SERVICE_NAME)
|
|
116
116
|
|
|
117
|
-
await run_until_exit([grpc_health_finalizer, ingest_consumers, metrics_server.shutdown
|
|
117
|
+
await run_until_exit([grpc_health_finalizer, ingest_consumers, metrics_server.shutdown, *finalizers])
|
|
118
118
|
|
|
119
119
|
|
|
120
120
|
async def main_orm_grpc(): # pragma: no cover
|
|
121
121
|
finalizers = await initialize()
|
|
122
122
|
grpc_finalizer = await start_grpc(SERVICE_NAME)
|
|
123
123
|
metrics_server = await serve_metrics()
|
|
124
|
-
await run_until_exit([grpc_finalizer, metrics_server.shutdown
|
|
124
|
+
await run_until_exit([grpc_finalizer, metrics_server.shutdown, *finalizers])
|
|
125
125
|
|
|
126
126
|
|
|
127
127
|
async def main_ingest_processed_consumer(): # pragma: no cover
|
|
@@ -134,7 +134,7 @@ async def main_ingest_processed_consumer(): # pragma: no cover
|
|
|
134
134
|
consumer = await consumer_service.start_ingest_processed_consumer_v2(SERVICE_NAME)
|
|
135
135
|
|
|
136
136
|
await run_until_exit(
|
|
137
|
-
[grpc_health_finalizer, consumer, metrics_server.shutdown, stop_processing_engine
|
|
137
|
+
[grpc_health_finalizer, consumer, metrics_server.shutdown, stop_processing_engine, *finalizers]
|
|
138
138
|
)
|
|
139
139
|
|
|
140
140
|
|
|
@@ -158,8 +158,8 @@ async def main_subscriber_workers(): # pragma: no cover
|
|
|
158
158
|
backup_consumers_finalizers = await initialize_backup_consumers(context)
|
|
159
159
|
|
|
160
160
|
await run_until_exit(
|
|
161
|
-
|
|
162
|
-
|
|
161
|
+
[
|
|
162
|
+
*backup_consumers_finalizers,
|
|
163
163
|
imports_consumer.finalize,
|
|
164
164
|
exports_consumer.finalize,
|
|
165
165
|
stop_ingest_utility,
|
|
@@ -169,8 +169,8 @@ async def main_subscriber_workers(): # pragma: no cover
|
|
|
169
169
|
grpc_health_finalizer,
|
|
170
170
|
metrics_server.shutdown,
|
|
171
171
|
context.finalize,
|
|
172
|
+
*finalizers,
|
|
172
173
|
]
|
|
173
|
-
+ finalizers
|
|
174
174
|
)
|
|
175
175
|
|
|
176
176
|
|
|
@@ -20,12 +20,10 @@
|
|
|
20
20
|
import asyncio
|
|
21
21
|
import logging
|
|
22
22
|
import time
|
|
23
|
-
from typing import Optional, Union
|
|
24
23
|
|
|
25
24
|
import backoff
|
|
26
25
|
import nats
|
|
27
26
|
import nats.js.api
|
|
28
|
-
import nats.js.errors
|
|
29
27
|
from nats.aio.client import Msg
|
|
30
28
|
from nats.js import JetStreamContext
|
|
31
29
|
|
|
@@ -74,8 +72,8 @@ class IngestConsumer:
|
|
|
74
72
|
partition: str,
|
|
75
73
|
storage: Storage,
|
|
76
74
|
nats_connection_manager: NatsConnectionManager,
|
|
77
|
-
pubsub:
|
|
78
|
-
lock:
|
|
75
|
+
pubsub: PubSubDriver | None = None,
|
|
76
|
+
lock: asyncio.Lock | asyncio.Semaphore | None = None,
|
|
79
77
|
):
|
|
80
78
|
self.driver = driver
|
|
81
79
|
self.partition = partition
|
|
@@ -85,9 +83,9 @@ class IngestConsumer:
|
|
|
85
83
|
|
|
86
84
|
self.lock = lock or asyncio.Lock()
|
|
87
85
|
self.processor = Processor(driver, storage, pubsub, partition)
|
|
88
|
-
self.subscription:
|
|
86
|
+
self.subscription: JetStreamContext.PullSubscription | None = None
|
|
89
87
|
|
|
90
|
-
async def ack_message(self, msg: Msg, kbid:
|
|
88
|
+
async def ack_message(self, msg: Msg, kbid: str | None = None):
|
|
91
89
|
await msg.ack()
|
|
92
90
|
|
|
93
91
|
async def initialize(self):
|
|
@@ -162,7 +160,7 @@ class IngestConsumer:
|
|
|
162
160
|
async def subscription_worker(self, msg: Msg):
|
|
163
161
|
context.clear_context()
|
|
164
162
|
|
|
165
|
-
kbid:
|
|
163
|
+
kbid: str | None = None
|
|
166
164
|
subject = msg.subject
|
|
167
165
|
reply = msg.reply
|
|
168
166
|
seqid = int(reply.split(".")[5])
|
|
@@ -238,7 +236,7 @@ class IngestConsumer:
|
|
|
238
236
|
logger.info(
|
|
239
237
|
f"An error happend while processing a message from {message_source}. "
|
|
240
238
|
f"A copy of the message has been stored on {self.processor.storage.deadletter_bucket}. "
|
|
241
|
-
f"Check sentry for more details: {
|
|
239
|
+
f"Check sentry for more details: {e!s}"
|
|
242
240
|
)
|
|
243
241
|
await self.ack_message(msg, kbid)
|
|
244
242
|
logger.info("Message acked because of deadletter", extra={"seqid": seqid})
|
|
@@ -250,7 +248,7 @@ class IngestConsumer:
|
|
|
250
248
|
logger.info(
|
|
251
249
|
f"An error happend while processing a message from {message_source}. "
|
|
252
250
|
f"This message has been dropped and won't be retried again"
|
|
253
|
-
f"Check sentry for more details: {
|
|
251
|
+
f"Check sentry for more details: {e!s}"
|
|
254
252
|
)
|
|
255
253
|
await self.ack_message(msg, kbid)
|
|
256
254
|
logger.info("Message acked because of drop", extra={"seqid": seqid})
|
|
@@ -260,7 +258,7 @@ class IngestConsumer:
|
|
|
260
258
|
logger.exception(
|
|
261
259
|
f"An error happend while processing a message from {message_source}. "
|
|
262
260
|
"Message has not been ACKd and will be retried. "
|
|
263
|
-
f"Check sentry for more details: {
|
|
261
|
+
f"Check sentry for more details: {e!s}"
|
|
264
262
|
)
|
|
265
263
|
await msg.nak()
|
|
266
264
|
logger.info("Message nacked because of unhandled error", extra={"seqid": seqid})
|
nucliadb/ingest/consumer/pull.py
CHANGED
|
@@ -21,7 +21,6 @@ import asyncio
|
|
|
21
21
|
import base64
|
|
22
22
|
import time
|
|
23
23
|
from contextlib import contextmanager
|
|
24
|
-
from typing import Optional
|
|
25
24
|
|
|
26
25
|
from aiohttp.client_exceptions import ClientConnectorError
|
|
27
26
|
from opentelemetry import trace
|
|
@@ -31,10 +30,10 @@ from opentelemetry.trace import (
|
|
|
31
30
|
Link,
|
|
32
31
|
)
|
|
33
32
|
|
|
33
|
+
from nucliadb.common.http_clients.exceptions import ServiceUnavailableException
|
|
34
34
|
from nucliadb.common.http_clients.processing import (
|
|
35
35
|
ProcessingHTTPClient,
|
|
36
36
|
ProcessingPullMessageProgressUpdater,
|
|
37
|
-
get_nua_api_id,
|
|
38
37
|
)
|
|
39
38
|
from nucliadb.common.maindb.driver import Driver
|
|
40
39
|
from nucliadb.ingest import SERVICE_NAME, logger, logger_activity
|
|
@@ -95,7 +94,7 @@ class PullV2Worker:
|
|
|
95
94
|
driver: Driver,
|
|
96
95
|
storage: Storage,
|
|
97
96
|
pull_time_error_backoff: int,
|
|
98
|
-
pubsub:
|
|
97
|
+
pubsub: PubSubDriver | None = None,
|
|
99
98
|
pull_time_empty_backoff: float = 5.0,
|
|
100
99
|
pull_api_timeout: int = 60,
|
|
101
100
|
):
|
|
@@ -141,12 +140,9 @@ class PullV2Worker:
|
|
|
141
140
|
data = None
|
|
142
141
|
if nuclia_settings.nuclia_service_account is not None:
|
|
143
142
|
headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
|
|
144
|
-
|
|
145
|
-
try:
|
|
146
|
-
get_nua_api_id()
|
|
147
|
-
except Exception as exc:
|
|
143
|
+
if nuclia_settings.nuclia_service_account is None:
|
|
148
144
|
logger.exception("Could not read NUA API Key. Can not start pull worker")
|
|
149
|
-
raise ReallyStopPulling()
|
|
145
|
+
raise ReallyStopPulling()
|
|
150
146
|
|
|
151
147
|
ack_tokens = []
|
|
152
148
|
async with ProcessingHTTPClient() as processing_http_client:
|
|
@@ -209,6 +205,12 @@ class PullV2Worker:
|
|
|
209
205
|
payload_length = len(base64.b64decode(data.payload))
|
|
210
206
|
logger.error(f"Message too big for transaction: {payload_length}")
|
|
211
207
|
raise e
|
|
208
|
+
|
|
209
|
+
except ServiceUnavailableException as ex:
|
|
210
|
+
logger.warning(f"Processing api is unavailable, will retry shortly: {ex}")
|
|
211
|
+
await processing_http_client.reset_session()
|
|
212
|
+
await asyncio.sleep(self.pull_time_error_backoff)
|
|
213
|
+
|
|
212
214
|
except Exception:
|
|
213
215
|
logger.exception("Unhandled error pulling messages from processing")
|
|
214
216
|
await asyncio.sleep(self.pull_time_error_backoff)
|
|
@@ -19,24 +19,21 @@
|
|
|
19
19
|
#
|
|
20
20
|
import asyncio
|
|
21
21
|
import sys
|
|
22
|
+
from collections.abc import Awaitable, Callable
|
|
22
23
|
from functools import partial
|
|
23
|
-
from typing import Awaitable, Callable, Optional
|
|
24
24
|
|
|
25
|
-
from nucliadb.common.back_pressure.materializer import BackPressureMaterializer
|
|
26
|
-
from nucliadb.common.back_pressure.settings import settings as back_pressure_settings
|
|
27
25
|
from nucliadb.common.maindb.utils import setup_driver
|
|
28
26
|
from nucliadb.ingest import SERVICE_NAME, logger
|
|
29
27
|
from nucliadb.ingest.consumer.consumer import IngestConsumer
|
|
30
28
|
from nucliadb.ingest.consumer.pull import PullV2Worker
|
|
31
29
|
from nucliadb.ingest.settings import settings
|
|
32
30
|
from nucliadb_utils.exceptions import ConfigurationError
|
|
33
|
-
from nucliadb_utils.settings import
|
|
31
|
+
from nucliadb_utils.settings import transaction_settings
|
|
34
32
|
from nucliadb_utils.utilities import (
|
|
35
33
|
get_audit,
|
|
36
34
|
get_nats_manager,
|
|
37
35
|
get_pubsub,
|
|
38
36
|
get_storage,
|
|
39
|
-
start_nats_manager,
|
|
40
37
|
)
|
|
41
38
|
|
|
42
39
|
from .auditing import IndexAuditHandler, ResourceWritesAuditHandler
|
|
@@ -57,29 +54,8 @@ async def _exit_tasks(tasks: list[asyncio.Task]) -> None:
|
|
|
57
54
|
await asyncio.gather(*tasks, return_exceptions=True)
|
|
58
55
|
|
|
59
56
|
|
|
60
|
-
async def start_back_pressure() -> BackPressureMaterializer:
|
|
61
|
-
logger.info("Starting back pressure materializer")
|
|
62
|
-
nats_manager = await start_nats_manager(
|
|
63
|
-
SERVICE_NAME,
|
|
64
|
-
indexing_settings.index_jetstream_servers,
|
|
65
|
-
indexing_settings.index_jetstream_auth,
|
|
66
|
-
)
|
|
67
|
-
back_pressure = BackPressureMaterializer(
|
|
68
|
-
nats_manager,
|
|
69
|
-
indexing_check_interval=back_pressure_settings.indexing_check_interval,
|
|
70
|
-
ingest_check_interval=back_pressure_settings.ingest_check_interval,
|
|
71
|
-
)
|
|
72
|
-
await back_pressure.start()
|
|
73
|
-
return back_pressure
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
async def stop_back_pressure(materializer: BackPressureMaterializer) -> None:
|
|
77
|
-
await materializer.stop()
|
|
78
|
-
await materializer.nats_manager.finalize()
|
|
79
|
-
|
|
80
|
-
|
|
81
57
|
async def start_ingest_consumers(
|
|
82
|
-
service_name:
|
|
58
|
+
service_name: str | None = None,
|
|
83
59
|
) -> Callable[[], Awaitable[None]]:
|
|
84
60
|
if transaction_settings.transaction_local:
|
|
85
61
|
raise ConfigurationError("Can not start ingest consumers in local mode")
|
|
@@ -115,7 +91,7 @@ async def start_ingest_consumers(
|
|
|
115
91
|
|
|
116
92
|
|
|
117
93
|
async def start_ingest_processed_consumer_v2(
|
|
118
|
-
service_name:
|
|
94
|
+
service_name: str | None = None,
|
|
119
95
|
) -> Callable[[], Awaitable[None]]:
|
|
120
96
|
"""
|
|
121
97
|
This is not meant to be deployed with a stateful set like the other consumers.
|
|
@@ -164,9 +140,8 @@ async def start_shard_creator() -> Callable[[], Awaitable[None]]:
|
|
|
164
140
|
driver = await setup_driver()
|
|
165
141
|
pubsub = await get_pubsub()
|
|
166
142
|
assert pubsub is not None, "Pubsub is not configured"
|
|
167
|
-
storage = await get_storage(service_name=SERVICE_NAME)
|
|
168
143
|
|
|
169
|
-
shard_creator = ShardCreatorHandler(driver=driver,
|
|
144
|
+
shard_creator = ShardCreatorHandler(driver=driver, pubsub=pubsub)
|
|
170
145
|
await shard_creator.initialize()
|
|
171
146
|
|
|
172
147
|
return shard_creator.finalize
|
|
@@ -25,14 +25,14 @@ from typing import Any
|
|
|
25
25
|
|
|
26
26
|
from nidx_protos import nodereader_pb2, noderesources_pb2
|
|
27
27
|
|
|
28
|
-
from nucliadb.common import locking
|
|
28
|
+
from nucliadb.common import datamanagers, locking
|
|
29
|
+
from nucliadb.common.cluster.settings import settings
|
|
29
30
|
from nucliadb.common.cluster.utils import get_shard_manager
|
|
30
31
|
from nucliadb.common.maindb.driver import Driver
|
|
31
32
|
from nucliadb.common.nidx import get_nidx_api_client
|
|
32
33
|
from nucliadb_protos import writer_pb2
|
|
33
34
|
from nucliadb_utils import const
|
|
34
35
|
from nucliadb_utils.cache.pubsub import PubSubDriver
|
|
35
|
-
from nucliadb_utils.storages.storage import Storage
|
|
36
36
|
|
|
37
37
|
from . import metrics
|
|
38
38
|
from .utils import DelayedTaskHandler
|
|
@@ -52,12 +52,10 @@ class ShardCreatorHandler:
|
|
|
52
52
|
self,
|
|
53
53
|
*,
|
|
54
54
|
driver: Driver,
|
|
55
|
-
storage: Storage,
|
|
56
55
|
pubsub: PubSubDriver,
|
|
57
56
|
check_delay: float = 10.0,
|
|
58
57
|
):
|
|
59
58
|
self.driver = driver
|
|
60
|
-
self.storage = storage
|
|
61
59
|
self.pubsub = pubsub
|
|
62
60
|
self.shard_manager = get_shard_manager()
|
|
63
61
|
self.task_handler = DelayedTaskHandler(check_delay)
|
|
@@ -111,4 +109,17 @@ class ShardCreatorHandler:
|
|
|
111
109
|
shard_id=noderesources_pb2.ShardId(id=current_shard.nidx_shard_id)
|
|
112
110
|
) # type: ignore
|
|
113
111
|
)
|
|
114
|
-
|
|
112
|
+
|
|
113
|
+
if not should_create_new_shard(shard.paragraphs):
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
logger.info({"message": "Adding shard", "kbid": kbid})
|
|
117
|
+
async with datamanagers.with_rw_transaction() as txn:
|
|
118
|
+
kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
|
|
119
|
+
prewarm = kb_config is not None and kb_config.prewarm_enabled
|
|
120
|
+
await self.shard_manager.create_shard_by_kbid(txn, kbid, prewarm_enabled=prewarm)
|
|
121
|
+
await txn.commit()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def should_create_new_shard(num_paragraphs: int) -> bool:
|
|
125
|
+
return num_paragraphs > settings.max_shard_paragraphs
|
nucliadb/ingest/fields/base.py
CHANGED
|
@@ -24,11 +24,12 @@ import enum
|
|
|
24
24
|
import logging
|
|
25
25
|
from collections import defaultdict
|
|
26
26
|
from datetime import datetime
|
|
27
|
-
from typing import TYPE_CHECKING, Any, Generic,
|
|
27
|
+
from typing import TYPE_CHECKING, Any, Generic, TypeVar
|
|
28
28
|
|
|
29
29
|
from google.protobuf.message import DecodeError, Message
|
|
30
30
|
|
|
31
31
|
from nucliadb.common import datamanagers
|
|
32
|
+
from nucliadb.common.ids import FieldId
|
|
32
33
|
from nucliadb.ingest.fields.exceptions import InvalidFieldClass, InvalidPBClass
|
|
33
34
|
from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
|
|
34
35
|
from nucliadb_protos.resources_pb2 import (
|
|
@@ -46,10 +47,8 @@ from nucliadb_protos.resources_pb2 import (
|
|
|
46
47
|
)
|
|
47
48
|
from nucliadb_protos.utils_pb2 import ExtractedText, VectorObject
|
|
48
49
|
from nucliadb_protos.writer_pb2 import Error, FieldStatus
|
|
49
|
-
from nucliadb_utils import const
|
|
50
50
|
from nucliadb_utils.storages.exceptions import CouldNotCopyNotFound
|
|
51
51
|
from nucliadb_utils.storages.storage import Storage, StorageField
|
|
52
|
-
from nucliadb_utils.utilities import has_feature
|
|
53
52
|
|
|
54
53
|
logger = logging.getLogger(__name__)
|
|
55
54
|
|
|
@@ -76,27 +75,27 @@ PbType = TypeVar("PbType", bound=Message)
|
|
|
76
75
|
|
|
77
76
|
|
|
78
77
|
class Field(Generic[PbType]):
|
|
79
|
-
pbklass:
|
|
78
|
+
pbklass: type[PbType]
|
|
80
79
|
type: str = "x"
|
|
81
|
-
value:
|
|
82
|
-
extracted_text:
|
|
83
|
-
extracted_vectors: dict[
|
|
84
|
-
computed_metadata:
|
|
85
|
-
large_computed_metadata:
|
|
86
|
-
question_answers:
|
|
80
|
+
value: Any | None
|
|
81
|
+
extracted_text: ExtractedText | None
|
|
82
|
+
extracted_vectors: dict[str | None, VectorObject]
|
|
83
|
+
computed_metadata: FieldComputedMetadata | None
|
|
84
|
+
large_computed_metadata: LargeComputedMetadata | None
|
|
85
|
+
question_answers: FieldQuestionAnswers | None
|
|
87
86
|
|
|
88
87
|
def __init__(
|
|
89
88
|
self,
|
|
90
89
|
id: str,
|
|
91
90
|
resource: Resource,
|
|
92
|
-
pb:
|
|
93
|
-
value:
|
|
91
|
+
pb: Any | None = None,
|
|
92
|
+
value: Any | None = None,
|
|
94
93
|
):
|
|
95
94
|
if self.pbklass is None:
|
|
96
95
|
raise InvalidFieldClass()
|
|
97
96
|
|
|
98
97
|
self.value = None
|
|
99
|
-
self.extracted_text:
|
|
98
|
+
self.extracted_text: ExtractedText | None = None
|
|
100
99
|
self.extracted_vectors = {}
|
|
101
100
|
self.computed_metadata = None
|
|
102
101
|
self.large_computed_metadata = None
|
|
@@ -119,12 +118,20 @@ class Field(Generic[PbType]):
|
|
|
119
118
|
|
|
120
119
|
@property
|
|
121
120
|
def kbid(self) -> str:
|
|
122
|
-
return self.resource.
|
|
121
|
+
return self.resource.kbid
|
|
123
122
|
|
|
124
123
|
@property
|
|
125
124
|
def uuid(self) -> str:
|
|
126
125
|
return self.resource.uuid
|
|
127
126
|
|
|
127
|
+
@property
|
|
128
|
+
def field_id(self) -> FieldId:
|
|
129
|
+
return FieldId(
|
|
130
|
+
rid=self.resource.uuid,
|
|
131
|
+
type=self.type,
|
|
132
|
+
key=self.id,
|
|
133
|
+
)
|
|
134
|
+
|
|
128
135
|
@property
|
|
129
136
|
def storage(self) -> Storage:
|
|
130
137
|
return self.resource.storage
|
|
@@ -152,7 +159,7 @@ class Field(Generic[PbType]):
|
|
|
152
159
|
|
|
153
160
|
return self.storage.file_extracted(self.kbid, self.uuid, self.type, self.id, key)
|
|
154
161
|
|
|
155
|
-
async def db_get_value(self) ->
|
|
162
|
+
async def db_get_value(self) -> PbType | None:
|
|
156
163
|
if self.value is None:
|
|
157
164
|
payload = await datamanagers.fields.get_raw(
|
|
158
165
|
self.resource.txn,
|
|
@@ -215,21 +222,6 @@ class Field(Generic[PbType]):
|
|
|
215
222
|
) -> None:
|
|
216
223
|
# Try delete vectors
|
|
217
224
|
sf = self._get_extracted_vectors_storage_field(vectorset, storage_key_kind)
|
|
218
|
-
|
|
219
|
-
if has_feature(const.Features.DEBUG_MISSING_VECTORS):
|
|
220
|
-
# This is a very chatty log. It is just a temporary hint while debugging an issue.
|
|
221
|
-
logger.info(
|
|
222
|
-
"Deleting vectors from storage",
|
|
223
|
-
extra={
|
|
224
|
-
"kbid": self.kbid,
|
|
225
|
-
"rid": self.resource.uuid,
|
|
226
|
-
"field": f"{self.type}/{self.id}",
|
|
227
|
-
"vectorset": vectorset,
|
|
228
|
-
"storage_key_kind": storage_key_kind,
|
|
229
|
-
"key": sf.key,
|
|
230
|
-
"bucket": sf.bucket,
|
|
231
|
-
},
|
|
232
|
-
)
|
|
233
225
|
try:
|
|
234
226
|
await self.storage.delete_upload(sf.key, sf.bucket)
|
|
235
227
|
except KeyError:
|
|
@@ -242,7 +234,7 @@ class Field(Generic[PbType]):
|
|
|
242
234
|
except KeyError:
|
|
243
235
|
pass
|
|
244
236
|
|
|
245
|
-
async def get_error(self) ->
|
|
237
|
+
async def get_error(self) -> Error | None:
|
|
246
238
|
return await datamanagers.fields.get_error(
|
|
247
239
|
self.resource.txn,
|
|
248
240
|
kbid=self.kbid,
|
|
@@ -261,7 +253,7 @@ class Field(Generic[PbType]):
|
|
|
261
253
|
error=error,
|
|
262
254
|
)
|
|
263
255
|
|
|
264
|
-
async def get_status(self) ->
|
|
256
|
+
async def get_status(self) -> FieldStatus | None:
|
|
265
257
|
return await datamanagers.fields.get_status(
|
|
266
258
|
self.resource.txn,
|
|
267
259
|
kbid=self.kbid,
|
|
@@ -280,7 +272,7 @@ class Field(Generic[PbType]):
|
|
|
280
272
|
status=status,
|
|
281
273
|
)
|
|
282
274
|
|
|
283
|
-
async def get_question_answers(self, force=False) ->
|
|
275
|
+
async def get_question_answers(self, force=False) -> FieldQuestionAnswers | None:
|
|
284
276
|
if self.question_answers is None or force:
|
|
285
277
|
sf = self.get_storage_field(FieldTypes.QUESTION_ANSWERS)
|
|
286
278
|
try:
|
|
@@ -297,9 +289,7 @@ class Field(Generic[PbType]):
|
|
|
297
289
|
async def set_question_answers(self, payload: FieldQuestionAnswerWrapper) -> None:
|
|
298
290
|
if self.type in SUBFIELDFIELDS:
|
|
299
291
|
try:
|
|
300
|
-
actual_payload:
|
|
301
|
-
force=True
|
|
302
|
-
)
|
|
292
|
+
actual_payload: FieldQuestionAnswers | None = await self.get_question_answers(force=True)
|
|
303
293
|
except KeyError:
|
|
304
294
|
actual_payload = None
|
|
305
295
|
else:
|
|
@@ -332,7 +322,7 @@ class Field(Generic[PbType]):
|
|
|
332
322
|
self.question_answers = actual_payload
|
|
333
323
|
|
|
334
324
|
async def set_extracted_text(self, payload: ExtractedTextWrapper) -> None:
|
|
335
|
-
actual_payload:
|
|
325
|
+
actual_payload: ExtractedText | None = None
|
|
336
326
|
if self.type in SUBFIELDFIELDS:
|
|
337
327
|
# Try to get the previously extracted text protobuf if it exists so we can merge it with the new splits
|
|
338
328
|
# coming from the processing payload.
|
|
@@ -383,7 +373,7 @@ class Field(Generic[PbType]):
|
|
|
383
373
|
await self.storage.upload_pb(sf, actual_payload)
|
|
384
374
|
self.extracted_text = actual_payload
|
|
385
375
|
|
|
386
|
-
async def get_extracted_text(self, force=False) ->
|
|
376
|
+
async def get_extracted_text(self, force=False) -> ExtractedText | None:
|
|
387
377
|
if self.extracted_text is None or force:
|
|
388
378
|
async with self.locks["extracted_text"]:
|
|
389
379
|
# Value could have been fetched while waiting for the lock
|
|
@@ -399,10 +389,10 @@ class Field(Generic[PbType]):
|
|
|
399
389
|
payload: ExtractedVectorsWrapper,
|
|
400
390
|
vectorset: str,
|
|
401
391
|
storage_key_kind: VectorSetConfig.StorageKeyKind.ValueType,
|
|
402
|
-
) ->
|
|
392
|
+
) -> VectorObject | None:
|
|
403
393
|
if self.type in SUBFIELDFIELDS:
|
|
404
394
|
try:
|
|
405
|
-
actual_payload:
|
|
395
|
+
actual_payload: VectorObject | None = await self.get_vectors(
|
|
406
396
|
vectorset=vectorset,
|
|
407
397
|
storage_key_kind=storage_key_kind,
|
|
408
398
|
force=True,
|
|
@@ -413,7 +403,7 @@ class Field(Generic[PbType]):
|
|
|
413
403
|
actual_payload = None
|
|
414
404
|
|
|
415
405
|
sf = self._get_extracted_vectors_storage_field(vectorset, storage_key_kind)
|
|
416
|
-
vo:
|
|
406
|
+
vo: VectorObject | None = None
|
|
417
407
|
if actual_payload is None:
|
|
418
408
|
# Its first extracted vectors
|
|
419
409
|
if payload.HasField("file"):
|
|
@@ -465,7 +455,7 @@ class Field(Generic[PbType]):
|
|
|
465
455
|
vectorset: str,
|
|
466
456
|
storage_key_kind: VectorSetConfig.StorageKeyKind.ValueType,
|
|
467
457
|
force: bool = False,
|
|
468
|
-
) ->
|
|
458
|
+
) -> VectorObject | None:
|
|
469
459
|
if self.extracted_vectors.get(vectorset, None) is None or force:
|
|
470
460
|
sf = self._get_extracted_vectors_storage_field(vectorset, storage_key_kind)
|
|
471
461
|
payload = await self.storage.download_pb(sf, VectorObject)
|
|
@@ -476,9 +466,7 @@ class Field(Generic[PbType]):
|
|
|
476
466
|
async def set_field_metadata(self, payload: FieldComputedMetadataWrapper) -> FieldComputedMetadata:
|
|
477
467
|
if self.type in SUBFIELDFIELDS:
|
|
478
468
|
try:
|
|
479
|
-
actual_payload:
|
|
480
|
-
force=True
|
|
481
|
-
)
|
|
469
|
+
actual_payload: FieldComputedMetadata | None = await self.get_field_metadata(force=True)
|
|
482
470
|
except KeyError:
|
|
483
471
|
actual_payload = None
|
|
484
472
|
else:
|
|
@@ -521,7 +509,7 @@ class Field(Generic[PbType]):
|
|
|
521
509
|
|
|
522
510
|
return self.computed_metadata
|
|
523
511
|
|
|
524
|
-
async def get_field_metadata(self, force: bool = False) ->
|
|
512
|
+
async def get_field_metadata(self, force: bool = False) -> FieldComputedMetadata | None:
|
|
525
513
|
if self.computed_metadata is None or force:
|
|
526
514
|
async with self.locks["field_metadata"]:
|
|
527
515
|
# Value could have been fetched while waiting for the lock
|
|
@@ -535,7 +523,7 @@ class Field(Generic[PbType]):
|
|
|
535
523
|
async def set_large_field_metadata(self, payload: LargeComputedMetadataWrapper):
|
|
536
524
|
if self.type in SUBFIELDFIELDS:
|
|
537
525
|
try:
|
|
538
|
-
actual_payload:
|
|
526
|
+
actual_payload: LargeComputedMetadata | None = await self.get_large_field_metadata(
|
|
539
527
|
force=True
|
|
540
528
|
)
|
|
541
529
|
except KeyError:
|
|
@@ -545,7 +533,7 @@ class Field(Generic[PbType]):
|
|
|
545
533
|
|
|
546
534
|
sf = self.get_storage_field(FieldTypes.FIELD_LARGE_METADATA)
|
|
547
535
|
|
|
548
|
-
new_payload:
|
|
536
|
+
new_payload: LargeComputedMetadata | None = None
|
|
549
537
|
if payload.HasField("file"):
|
|
550
538
|
new_payload = LargeComputedMetadata()
|
|
551
539
|
data = await self.storage.downloadbytescf(payload.file)
|
|
@@ -572,7 +560,7 @@ class Field(Generic[PbType]):
|
|
|
572
560
|
|
|
573
561
|
return self.large_computed_metadata
|
|
574
562
|
|
|
575
|
-
async def get_large_field_metadata(self, force: bool = False) ->
|
|
563
|
+
async def get_large_field_metadata(self, force: bool = False) -> LargeComputedMetadata | None:
|
|
576
564
|
if self.large_computed_metadata is None or force:
|
|
577
565
|
sf = self.get_storage_field(FieldTypes.FIELD_LARGE_METADATA)
|
|
578
566
|
payload = await self.storage.download_pb(
|