nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +2 -2
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +2 -2
- migrations/0039_backfill_converation_splits_metadata.py +2 -2
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/interface.py +12 -12
- nucliadb/common/catalog/pg.py +41 -29
- nucliadb/common/catalog/utils.py +3 -3
- nucliadb/common/cluster/manager.py +5 -4
- nucliadb/common/cluster/rebalance.py +483 -114
- nucliadb/common/cluster/rollover.py +25 -9
- nucliadb/common/cluster/settings.py +3 -8
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +4 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +4 -5
- nucliadb/common/filter_expression.py +128 -40
- nucliadb/common/http_clients/processing.py +12 -23
- nucliadb/common/ids.py +6 -4
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +3 -4
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +3 -8
- nucliadb/ingest/consumer/service.py +3 -3
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +28 -49
- nucliadb/ingest/fields/conversation.py +12 -12
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +78 -64
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +4 -4
- nucliadb/ingest/orm/knowledgebox.py +18 -27
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +27 -27
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +72 -70
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +3 -109
- nucliadb/ingest/settings.py +3 -4
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +11 -11
- nucliadb/metrics_exporter.py +5 -4
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +3 -4
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/learning_config.py +24 -4
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +2 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +11 -15
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +25 -25
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +7 -7
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +24 -17
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -23
- nucliadb/search/search/chat/ask.py +88 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +449 -36
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +3 -152
- nucliadb/search/search/hydrator/fields.py +92 -50
- nucliadb/search/search/hydrator/images.py +7 -7
- nucliadb/search/search/hydrator/paragraphs.py +42 -26
- nucliadb/search/search/hydrator/resources.py +20 -16
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +10 -9
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +13 -9
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -20
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +4 -5
- nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
- nucliadb/search/search/query_parser/parsers/common.py +5 -6
- nucliadb/search/search/query_parser/parsers/find.py +6 -26
- nucliadb/search/search/query_parser/parsers/graph.py +13 -23
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -53
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +5 -6
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +2 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +2 -2
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +7 -11
- nucliadb/writer/api/v1/knowledgebox.py +3 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +7 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +1 -3
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +5 -6
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
|
@@ -17,9 +17,10 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
+
from collections.abc import AsyncGenerator, Callable, Coroutine, Sequence
|
|
20
21
|
from datetime import datetime
|
|
21
22
|
from functools import partial
|
|
22
|
-
from typing import Any
|
|
23
|
+
from typing import Any
|
|
23
24
|
from uuid import uuid4
|
|
24
25
|
|
|
25
26
|
from grpc import StatusCode
|
|
@@ -88,7 +89,7 @@ class KnowledgeBox:
|
|
|
88
89
|
self.txn = txn
|
|
89
90
|
self.storage = storage
|
|
90
91
|
self.kbid = kbid
|
|
91
|
-
self._config:
|
|
92
|
+
self._config: KnowledgeBoxConfig | None = None
|
|
92
93
|
|
|
93
94
|
@staticmethod
|
|
94
95
|
def new_unique_kbid() -> str:
|
|
@@ -248,14 +249,14 @@ class KnowledgeBox:
|
|
|
248
249
|
driver: Driver,
|
|
249
250
|
kbid: str,
|
|
250
251
|
*,
|
|
251
|
-
slug:
|
|
252
|
-
title:
|
|
253
|
-
description:
|
|
254
|
-
migration_version:
|
|
255
|
-
external_index_provider:
|
|
256
|
-
hidden_resources_enabled:
|
|
257
|
-
hidden_resources_hide_on_creation:
|
|
258
|
-
prewarm_enabled:
|
|
252
|
+
slug: str | None = None,
|
|
253
|
+
title: str | None = None,
|
|
254
|
+
description: str | None = None,
|
|
255
|
+
migration_version: int | None = None,
|
|
256
|
+
external_index_provider: StoredExternalIndexProviderMetadata | None = None,
|
|
257
|
+
hidden_resources_enabled: bool | None = None,
|
|
258
|
+
hidden_resources_hide_on_creation: bool | None = None,
|
|
259
|
+
prewarm_enabled: bool | None = None,
|
|
259
260
|
) -> str:
|
|
260
261
|
async with driver.rw_transaction() as txn:
|
|
261
262
|
stored = await datamanagers.kb.get_config(txn, kbid=kbid, for_update=True)
|
|
@@ -428,7 +429,7 @@ class KnowledgeBox:
|
|
|
428
429
|
await txn.delete_by_prefix(prefix)
|
|
429
430
|
await txn.commit()
|
|
430
431
|
|
|
431
|
-
async def get_resource_shard(self, shard_id: str) ->
|
|
432
|
+
async def get_resource_shard(self, shard_id: str) -> writer_pb2.ShardObject | None:
|
|
432
433
|
async with datamanagers.with_ro_transaction() as txn:
|
|
433
434
|
pb = await datamanagers.cluster.get_kb_shards(txn, kbid=self.kbid)
|
|
434
435
|
if pb is None:
|
|
@@ -439,18 +440,8 @@ class KnowledgeBox:
|
|
|
439
440
|
return shard
|
|
440
441
|
return None
|
|
441
442
|
|
|
442
|
-
async def get(self, uuid: str) ->
|
|
443
|
-
|
|
444
|
-
if basic is None:
|
|
445
|
-
return None
|
|
446
|
-
return Resource(
|
|
447
|
-
txn=self.txn,
|
|
448
|
-
storage=self.storage,
|
|
449
|
-
kb=self,
|
|
450
|
-
uuid=uuid,
|
|
451
|
-
basic=basic,
|
|
452
|
-
disable_vectors=False,
|
|
453
|
-
)
|
|
443
|
+
async def get(self, uuid: str) -> Resource | None:
|
|
444
|
+
return await Resource.get(self.txn, self.kbid, uuid)
|
|
454
445
|
|
|
455
446
|
async def maindb_delete_resource(self, uuid: str):
|
|
456
447
|
basic = await datamanagers.resources.get_basic(self.txn, kbid=self.kbid, rid=uuid)
|
|
@@ -479,7 +470,7 @@ class KnowledgeBox:
|
|
|
479
470
|
with processor_observer({"type": "delete_resource_storage"}):
|
|
480
471
|
await self.storage_delete_resource(uuid)
|
|
481
472
|
|
|
482
|
-
async def get_resource_uuid_by_slug(self, slug: str) ->
|
|
473
|
+
async def get_resource_uuid_by_slug(self, slug: str) -> str | None:
|
|
483
474
|
return await datamanagers.resources.get_resource_uuid_from_slug(
|
|
484
475
|
self.txn, kbid=self.kbid, slug=slug
|
|
485
476
|
)
|
|
@@ -496,7 +487,7 @@ class KnowledgeBox:
|
|
|
496
487
|
key_ok = True
|
|
497
488
|
return slug
|
|
498
489
|
|
|
499
|
-
async def add_resource(self, uuid: str, slug: str, basic:
|
|
490
|
+
async def add_resource(self, uuid: str, slug: str, basic: Basic | None = None) -> Resource:
|
|
500
491
|
if basic is None:
|
|
501
492
|
basic = Basic()
|
|
502
493
|
if slug == "":
|
|
@@ -508,7 +499,7 @@ class KnowledgeBox:
|
|
|
508
499
|
return Resource(
|
|
509
500
|
storage=self.storage,
|
|
510
501
|
txn=self.txn,
|
|
511
|
-
|
|
502
|
+
kbid=self.kbid,
|
|
512
503
|
uuid=uuid,
|
|
513
504
|
basic=basic,
|
|
514
505
|
disable_vectors=False,
|
|
@@ -523,7 +514,7 @@ class KnowledgeBox:
|
|
|
523
514
|
yield Resource(
|
|
524
515
|
self.txn,
|
|
525
516
|
self.storage,
|
|
526
|
-
self,
|
|
517
|
+
self.kbid,
|
|
527
518
|
uuid,
|
|
528
519
|
disable_vectors=False,
|
|
529
520
|
)
|
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
from nucliadb.common.maindb.driver import Driver
|
|
21
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
|
22
21
|
from nucliadb.ingest.orm.resource import Resource
|
|
23
22
|
from nucliadb_protos import audit_pb2, writer_pb2
|
|
24
23
|
from nucliadb_protos.resources_pb2 import FieldType
|
|
@@ -35,8 +34,7 @@ async def collect_audit_fields(
|
|
|
35
34
|
|
|
36
35
|
audit_storage_fields: list[audit_pb2.AuditField] = []
|
|
37
36
|
async with driver.ro_transaction() as txn:
|
|
38
|
-
|
|
39
|
-
resource = Resource(txn, storage, kb, message.uuid)
|
|
37
|
+
resource = Resource(txn, storage, message.kbid, message.uuid)
|
|
40
38
|
field_keys = await resource.get_fields_ids()
|
|
41
39
|
|
|
42
40
|
for field_id, field_type in iterate_auditable_fields(field_keys, message):
|
|
@@ -20,7 +20,6 @@
|
|
|
20
20
|
|
|
21
21
|
import logging
|
|
22
22
|
from dataclasses import dataclass, field
|
|
23
|
-
from typing import Optional
|
|
24
23
|
|
|
25
24
|
from nucliadb.ingest.orm.resource import Resource
|
|
26
25
|
from nucliadb.ingest.processing import ProcessingEngine
|
|
@@ -94,7 +93,7 @@ def _generate_processing_payload_for_fields(
|
|
|
94
93
|
rid: str,
|
|
95
94
|
fields: GeneratedFields,
|
|
96
95
|
bm: writer_pb2.BrokerMessage,
|
|
97
|
-
) ->
|
|
96
|
+
) -> PushPayload | None:
|
|
98
97
|
partitioning = get_partitioning()
|
|
99
98
|
partition = partitioning.generate_partition(kbid, rid)
|
|
100
99
|
|
|
@@ -19,7 +19,6 @@
|
|
|
19
19
|
#
|
|
20
20
|
import asyncio
|
|
21
21
|
import logging
|
|
22
|
-
from typing import Optional
|
|
23
22
|
|
|
24
23
|
import aiohttp.client_exceptions
|
|
25
24
|
import nats.errors
|
|
@@ -145,8 +144,8 @@ class Processor:
|
|
|
145
144
|
self,
|
|
146
145
|
driver: Driver,
|
|
147
146
|
storage: Storage,
|
|
148
|
-
pubsub:
|
|
149
|
-
partition:
|
|
147
|
+
pubsub: PubSubDriver | None = None,
|
|
148
|
+
partition: str | None = None,
|
|
150
149
|
):
|
|
151
150
|
self.driver = driver
|
|
152
151
|
self.storage = storage
|
|
@@ -158,7 +157,7 @@ class Processor:
|
|
|
158
157
|
self,
|
|
159
158
|
message: writer_pb2.BrokerMessage,
|
|
160
159
|
seqid: int,
|
|
161
|
-
partition:
|
|
160
|
+
partition: str | None = None,
|
|
162
161
|
transaction_check: bool = True,
|
|
163
162
|
) -> None:
|
|
164
163
|
partition = partition if self.partition is None else self.partition
|
|
@@ -285,7 +284,7 @@ class Processor:
|
|
|
285
284
|
kb = KnowledgeBox(txn, self.storage, kbid)
|
|
286
285
|
uuid = await self.get_resource_uuid(kb, message)
|
|
287
286
|
|
|
288
|
-
resource:
|
|
287
|
+
resource: Resource | None = None
|
|
289
288
|
handled_exception = None
|
|
290
289
|
created = False
|
|
291
290
|
|
|
@@ -446,26 +445,27 @@ class Processor:
|
|
|
446
445
|
# a resource was move to another shard while it was being indexed
|
|
447
446
|
shard_id = await datamanagers.resources.get_resource_shard_id(txn, kbid=kbid, rid=uuid)
|
|
448
447
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
448
|
+
shard = None
|
|
449
|
+
if shard_id is not None:
|
|
450
|
+
# Resource already has a shard assigned
|
|
451
|
+
shard = await kb.get_resource_shard(shard_id)
|
|
452
|
+
if shard is None:
|
|
453
|
+
raise AttributeError("Shard not available")
|
|
454
|
+
else:
|
|
455
|
+
# It's a new resource, get KB's current active shard to place new resource on
|
|
456
|
+
shard = await self.index_node_shard_manager.get_current_active_shard(txn, kbid)
|
|
457
|
+
if shard is None:
|
|
458
|
+
# No current shard available, create a new one
|
|
459
|
+
async with locking.distributed_lock(locking.NEW_SHARD_LOCK.format(kbid=kbid)):
|
|
460
|
+
kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
|
|
461
|
+
prewarm = kb_config is not None and kb_config.prewarm_enabled
|
|
462
|
+
shard = await self.index_node_shard_manager.create_shard_by_kbid(
|
|
463
|
+
txn, kbid, prewarm_enabled=prewarm
|
|
464
|
+
)
|
|
465
|
+
await datamanagers.resources.set_resource_shard_id(
|
|
466
|
+
txn, kbid=kbid, rid=uuid, shard=shard.shard
|
|
464
467
|
)
|
|
465
|
-
|
|
466
|
-
txn, kbid=kbid, rid=uuid, shard=shard.shard
|
|
467
|
-
)
|
|
468
|
-
return shard
|
|
468
|
+
return shard
|
|
469
469
|
|
|
470
470
|
@processor_observer.wrap({"type": "index_resource"})
|
|
471
471
|
async def index_resource(
|
|
@@ -674,7 +674,7 @@ class Processor:
|
|
|
674
674
|
await self.pubsub.publish(channel, payload)
|
|
675
675
|
|
|
676
676
|
async def _mark_resource_error(
|
|
677
|
-
self, kb: KnowledgeBox, resource:
|
|
677
|
+
self, kb: KnowledgeBox, resource: Resource | None, partition: str, seqid: int
|
|
678
678
|
) -> None:
|
|
679
679
|
"""
|
|
680
680
|
Unhandled error processing, try to mark resource as error
|
|
@@ -695,8 +695,8 @@ class Processor:
|
|
|
695
695
|
# XXX: Why are these utility functions here?
|
|
696
696
|
async def get_kb_obj(
|
|
697
697
|
self, txn: Transaction, kbid: knowledgebox_pb2.KnowledgeBoxID
|
|
698
|
-
) ->
|
|
699
|
-
uuid:
|
|
698
|
+
) -> KnowledgeBox | None:
|
|
699
|
+
uuid: str | None = kbid.uuid
|
|
700
700
|
if uuid == "":
|
|
701
701
|
uuid = await datamanagers.kb.get_kb_uuid(txn, slug=kbid.slug)
|
|
702
702
|
|
|
@@ -17,14 +17,13 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
from nucliadb.common.maindb.driver import Driver, Transaction
|
|
23
22
|
|
|
24
23
|
TXNID = "/internal/worker/{worker}"
|
|
25
24
|
|
|
26
25
|
|
|
27
|
-
async def get_last_seqid(driver: Driver, worker: str) ->
|
|
26
|
+
async def get_last_seqid(driver: Driver, worker: str) -> int | None:
|
|
28
27
|
"""
|
|
29
28
|
Get last stored sequence id for a worker.
|
|
30
29
|
|
nucliadb/ingest/orm/resource.py
CHANGED
|
@@ -22,8 +22,9 @@ from __future__ import annotations
|
|
|
22
22
|
import asyncio
|
|
23
23
|
import logging
|
|
24
24
|
from collections import defaultdict
|
|
25
|
+
from collections.abc import Sequence
|
|
25
26
|
from concurrent.futures import ThreadPoolExecutor
|
|
26
|
-
from typing import
|
|
27
|
+
from typing import Any
|
|
27
28
|
|
|
28
29
|
from nucliadb.common import datamanagers
|
|
29
30
|
from nucliadb.common.datamanagers.resources import KB_RESOURCE_SLUG
|
|
@@ -68,13 +69,11 @@ from nucliadb_protos.resources_pb2 import Origin as PBOrigin
|
|
|
68
69
|
from nucliadb_protos.resources_pb2 import Relations as PBRelations
|
|
69
70
|
from nucliadb_protos.writer_pb2 import BrokerMessage
|
|
70
71
|
from nucliadb_utils.storages.storage import Storage
|
|
71
|
-
|
|
72
|
-
if TYPE_CHECKING: # pragma: no cover
|
|
73
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
|
72
|
+
from nucliadb_utils.utilities import get_storage
|
|
74
73
|
|
|
75
74
|
logger = logging.getLogger(__name__)
|
|
76
75
|
|
|
77
|
-
KB_FIELDS: dict[int,
|
|
76
|
+
KB_FIELDS: dict[int, type] = {
|
|
78
77
|
FieldType.TEXT: Text,
|
|
79
78
|
FieldType.FILE: File,
|
|
80
79
|
FieldType.LINK: Link,
|
|
@@ -104,40 +103,55 @@ class Resource:
|
|
|
104
103
|
self,
|
|
105
104
|
txn: Transaction,
|
|
106
105
|
storage: Storage,
|
|
107
|
-
|
|
106
|
+
kbid: str,
|
|
108
107
|
uuid: str,
|
|
109
|
-
basic:
|
|
108
|
+
basic: PBBasic | None = None,
|
|
110
109
|
disable_vectors: bool = True,
|
|
111
110
|
):
|
|
112
111
|
self.fields: dict[tuple[FieldType.ValueType, str], Field] = {}
|
|
113
112
|
self.conversations: dict[int, PBConversation] = {}
|
|
114
|
-
self.relations:
|
|
115
|
-
self.all_fields_keys:
|
|
116
|
-
self.origin:
|
|
117
|
-
self.extra:
|
|
118
|
-
self.security:
|
|
113
|
+
self.relations: PBRelations | None = None
|
|
114
|
+
self.all_fields_keys: list[tuple[FieldType.ValueType, str]] | None = None
|
|
115
|
+
self.origin: PBOrigin | None = None
|
|
116
|
+
self.extra: PBExtra | None = None
|
|
117
|
+
self.security: utils_pb2.Security | None = None
|
|
119
118
|
self.modified: bool = False
|
|
120
119
|
self._modified_extracted_text: list[FieldID] = []
|
|
121
120
|
|
|
122
121
|
self.txn = txn
|
|
123
122
|
self.storage = storage
|
|
124
|
-
self.
|
|
123
|
+
self.kbid = kbid
|
|
125
124
|
self.uuid = uuid
|
|
126
125
|
self.basic = basic
|
|
127
126
|
self.disable_vectors = disable_vectors
|
|
128
|
-
self._previous_status:
|
|
129
|
-
self.user_relations:
|
|
127
|
+
self._previous_status: Metadata.Status.ValueType | None = None
|
|
128
|
+
self.user_relations: PBRelations | None = None
|
|
130
129
|
self.locks: dict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
|
|
131
130
|
|
|
131
|
+
@classmethod
|
|
132
|
+
async def get(cls, txn: Transaction, kbid: str, rid: str) -> Resource | None:
|
|
133
|
+
basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=rid)
|
|
134
|
+
if basic is None:
|
|
135
|
+
return None
|
|
136
|
+
storage = await get_storage()
|
|
137
|
+
return cls(
|
|
138
|
+
txn=txn,
|
|
139
|
+
storage=storage,
|
|
140
|
+
kbid=kbid,
|
|
141
|
+
uuid=rid,
|
|
142
|
+
basic=basic,
|
|
143
|
+
disable_vectors=False,
|
|
144
|
+
)
|
|
145
|
+
|
|
132
146
|
async def set_slug(self):
|
|
133
147
|
basic = await self.get_basic()
|
|
134
|
-
new_key = KB_RESOURCE_SLUG.format(kbid=self.
|
|
148
|
+
new_key = KB_RESOURCE_SLUG.format(kbid=self.kbid, slug=basic.slug)
|
|
135
149
|
await self.txn.set(new_key, self.uuid.encode())
|
|
136
150
|
|
|
137
151
|
# Basic
|
|
138
152
|
async def get_basic(self) -> PBBasic:
|
|
139
153
|
if self.basic is None:
|
|
140
|
-
basic = await datamanagers.resources.get_basic(self.txn, kbid=self.
|
|
154
|
+
basic = await datamanagers.resources.get_basic(self.txn, kbid=self.kbid, rid=self.uuid)
|
|
141
155
|
self.basic = basic if basic is not None else PBBasic()
|
|
142
156
|
return self.basic
|
|
143
157
|
|
|
@@ -159,7 +173,7 @@ class Resource:
|
|
|
159
173
|
async def set_basic(
|
|
160
174
|
self,
|
|
161
175
|
payload: PBBasic,
|
|
162
|
-
deleted_fields:
|
|
176
|
+
deleted_fields: list[FieldID] | None = None,
|
|
163
177
|
):
|
|
164
178
|
await self.get_basic()
|
|
165
179
|
|
|
@@ -212,49 +226,43 @@ class Resource:
|
|
|
212
226
|
if deleted_fields is not None and len(deleted_fields) > 0:
|
|
213
227
|
delete_basic_computedmetadata_classifications(self.basic, deleted_fields=deleted_fields)
|
|
214
228
|
|
|
215
|
-
await datamanagers.resources.set_basic(
|
|
216
|
-
self.txn, kbid=self.kb.kbid, rid=self.uuid, basic=self.basic
|
|
217
|
-
)
|
|
229
|
+
await datamanagers.resources.set_basic(self.txn, kbid=self.kbid, rid=self.uuid, basic=self.basic)
|
|
218
230
|
self.modified = True
|
|
219
231
|
|
|
220
232
|
# Origin
|
|
221
|
-
async def get_origin(self) ->
|
|
233
|
+
async def get_origin(self) -> PBOrigin | None:
|
|
222
234
|
if self.origin is None:
|
|
223
|
-
origin = await datamanagers.resources.get_origin(self.txn, kbid=self.
|
|
235
|
+
origin = await datamanagers.resources.get_origin(self.txn, kbid=self.kbid, rid=self.uuid)
|
|
224
236
|
self.origin = origin
|
|
225
237
|
return self.origin
|
|
226
238
|
|
|
227
239
|
async def set_origin(self, payload: PBOrigin):
|
|
228
|
-
await datamanagers.resources.set_origin(
|
|
229
|
-
self.txn, kbid=self.kb.kbid, rid=self.uuid, origin=payload
|
|
230
|
-
)
|
|
240
|
+
await datamanagers.resources.set_origin(self.txn, kbid=self.kbid, rid=self.uuid, origin=payload)
|
|
231
241
|
self.modified = True
|
|
232
242
|
self.origin = payload
|
|
233
243
|
|
|
234
244
|
# Extra
|
|
235
|
-
async def get_extra(self) ->
|
|
245
|
+
async def get_extra(self) -> PBExtra | None:
|
|
236
246
|
if self.extra is None:
|
|
237
|
-
extra = await datamanagers.resources.get_extra(self.txn, kbid=self.
|
|
247
|
+
extra = await datamanagers.resources.get_extra(self.txn, kbid=self.kbid, rid=self.uuid)
|
|
238
248
|
self.extra = extra
|
|
239
249
|
return self.extra
|
|
240
250
|
|
|
241
251
|
async def set_extra(self, payload: PBExtra):
|
|
242
|
-
await datamanagers.resources.set_extra(self.txn, kbid=self.
|
|
252
|
+
await datamanagers.resources.set_extra(self.txn, kbid=self.kbid, rid=self.uuid, extra=payload)
|
|
243
253
|
self.modified = True
|
|
244
254
|
self.extra = payload
|
|
245
255
|
|
|
246
256
|
# Security
|
|
247
|
-
async def get_security(self) ->
|
|
257
|
+
async def get_security(self) -> utils_pb2.Security | None:
|
|
248
258
|
if self.security is None:
|
|
249
|
-
security = await datamanagers.resources.get_security(
|
|
250
|
-
self.txn, kbid=self.kb.kbid, rid=self.uuid
|
|
251
|
-
)
|
|
259
|
+
security = await datamanagers.resources.get_security(self.txn, kbid=self.kbid, rid=self.uuid)
|
|
252
260
|
self.security = security
|
|
253
261
|
return self.security
|
|
254
262
|
|
|
255
263
|
async def set_security(self, payload: utils_pb2.Security) -> None:
|
|
256
264
|
await datamanagers.resources.set_security(
|
|
257
|
-
self.txn, kbid=self.
|
|
265
|
+
self.txn, kbid=self.kbid, rid=self.uuid, security=payload
|
|
258
266
|
)
|
|
259
267
|
self.modified = True
|
|
260
268
|
self.security = payload
|
|
@@ -262,7 +270,7 @@ class Resource:
|
|
|
262
270
|
# Relations
|
|
263
271
|
async def get_user_relations(self) -> PBRelations:
|
|
264
272
|
if self.user_relations is None:
|
|
265
|
-
sf = self.storage.user_relations(self.
|
|
273
|
+
sf = self.storage.user_relations(self.kbid, self.uuid)
|
|
266
274
|
relations = await self.storage.download_pb(sf, PBRelations)
|
|
267
275
|
if relations is None:
|
|
268
276
|
# Key not found = no relations
|
|
@@ -272,7 +280,7 @@ class Resource:
|
|
|
272
280
|
return self.user_relations
|
|
273
281
|
|
|
274
282
|
async def set_user_relations(self, payload: PBRelations):
|
|
275
|
-
sf = self.storage.user_relations(self.
|
|
283
|
+
sf = self.storage.user_relations(self.kbid, self.uuid)
|
|
276
284
|
await self.storage.upload_pb(sf, payload)
|
|
277
285
|
self.modified = True
|
|
278
286
|
self.user_relations = payload
|
|
@@ -366,22 +374,22 @@ class Resource:
|
|
|
366
374
|
# REVIEW: are we sure we don't want to actually check this?
|
|
367
375
|
return (type, field) in self.fields
|
|
368
376
|
|
|
369
|
-
async def get_all_field_ids(self, *, for_update: bool) ->
|
|
377
|
+
async def get_all_field_ids(self, *, for_update: bool) -> PBAllFieldIDs | None:
|
|
370
378
|
return await datamanagers.resources.get_all_field_ids(
|
|
371
|
-
self.txn, kbid=self.
|
|
379
|
+
self.txn, kbid=self.kbid, rid=self.uuid, for_update=for_update
|
|
372
380
|
)
|
|
373
381
|
|
|
374
382
|
async def set_all_field_ids(self, all_fields: PBAllFieldIDs):
|
|
375
383
|
return await datamanagers.resources.set_all_field_ids(
|
|
376
|
-
self.txn, kbid=self.
|
|
384
|
+
self.txn, kbid=self.kbid, rid=self.uuid, allfields=all_fields
|
|
377
385
|
)
|
|
378
386
|
|
|
379
387
|
async def update_all_field_ids(
|
|
380
388
|
self,
|
|
381
389
|
*,
|
|
382
|
-
updated:
|
|
383
|
-
deleted:
|
|
384
|
-
errors:
|
|
390
|
+
updated: list[FieldID] | None = None,
|
|
391
|
+
deleted: list[FieldID] | None = None,
|
|
392
|
+
errors: list[writer_pb2.Error] | None = None,
|
|
385
393
|
):
|
|
386
394
|
needs_update = False
|
|
387
395
|
all_fields = await self.get_all_field_ids(for_update=True)
|
|
@@ -460,7 +468,7 @@ class Resource:
|
|
|
460
468
|
|
|
461
469
|
# If this message comes from the processor (not a DA worker), we clear all previous errors
|
|
462
470
|
# TODO: When generated_by is populated with DA tasks by processor, remove only related errors
|
|
463
|
-
from_processor = any(
|
|
471
|
+
from_processor = any(x.WhichOneof("generator") == "processor" for x in message.generated_by)
|
|
464
472
|
|
|
465
473
|
for (field_type, field), errors in errors_by_field.items():
|
|
466
474
|
field_obj = await self.get_field(field, field_type, load=False)
|
|
@@ -480,7 +488,7 @@ class Resource:
|
|
|
480
488
|
# We infer the status for processor messages
|
|
481
489
|
if message.source == BrokerMessage.MessageSource.PROCESSOR:
|
|
482
490
|
if any(
|
|
483
|
-
|
|
491
|
+
e.source_error.severity == writer_pb2.Error.Severity.ERROR for e in status.errors
|
|
484
492
|
):
|
|
485
493
|
status.status = writer_pb2.FieldStatus.Status.ERROR
|
|
486
494
|
else:
|
|
@@ -510,25 +518,21 @@ class Resource:
|
|
|
510
518
|
return
|
|
511
519
|
|
|
512
520
|
field_statuses = await datamanagers.fields.get_statuses(
|
|
513
|
-
self.txn, kbid=self.
|
|
521
|
+
self.txn, kbid=self.kbid, rid=self.uuid, fields=field_ids.fields
|
|
514
522
|
)
|
|
515
523
|
|
|
516
524
|
# If any field is processing -> PENDING
|
|
517
|
-
if any(
|
|
525
|
+
if any(f.status == writer_pb2.FieldStatus.Status.PENDING for f in field_statuses):
|
|
518
526
|
self.basic.metadata.status = PBMetadata.Status.PENDING
|
|
519
527
|
# If we have any non-DA error -> ERROR
|
|
520
528
|
elif any(
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
and e.source_error.code != writer_pb2.Error.ErrorCode.DATAAUGMENTATION
|
|
527
|
-
for e in f.errors
|
|
528
|
-
)
|
|
529
|
-
)
|
|
530
|
-
for f in field_statuses
|
|
529
|
+
f.status == writer_pb2.FieldStatus.Status.ERROR
|
|
530
|
+
and any(
|
|
531
|
+
e.source_error.severity == writer_pb2.Error.Severity.ERROR
|
|
532
|
+
and e.source_error.code != writer_pb2.Error.ErrorCode.DATAAUGMENTATION
|
|
533
|
+
for e in f.errors
|
|
531
534
|
)
|
|
535
|
+
for f in field_statuses
|
|
532
536
|
):
|
|
533
537
|
self.basic.metadata.status = PBMetadata.Status.ERROR
|
|
534
538
|
# Otherwise (everything processed or we only have DA errors) -> PROCESSED
|
|
@@ -651,7 +655,7 @@ class Resource:
|
|
|
651
655
|
FieldType.LINK,
|
|
652
656
|
load=False,
|
|
653
657
|
)
|
|
654
|
-
maybe_update_basic_thumbnail(self.basic, link_extracted_data.link_thumbnail, self.
|
|
658
|
+
maybe_update_basic_thumbnail(self.basic, link_extracted_data.link_thumbnail, self.kbid)
|
|
655
659
|
|
|
656
660
|
await field_link.set_link_extracted_data(link_extracted_data)
|
|
657
661
|
|
|
@@ -678,7 +682,7 @@ class Resource:
|
|
|
678
682
|
return
|
|
679
683
|
logger.info(
|
|
680
684
|
"Updating resource title from link extracted data",
|
|
681
|
-
extra={"kbid": self.
|
|
685
|
+
extra={"kbid": self.kbid, "field": link_extracted_data.field, "rid": self.uuid},
|
|
682
686
|
)
|
|
683
687
|
title = link_extracted_data.title
|
|
684
688
|
await self.update_resource_title(title)
|
|
@@ -720,7 +724,7 @@ class Resource:
|
|
|
720
724
|
# uri can change after extraction
|
|
721
725
|
await field_file.set_file_extracted_data(file_extracted_data)
|
|
722
726
|
maybe_update_basic_icon(self.basic, file_extracted_data.icon)
|
|
723
|
-
maybe_update_basic_thumbnail(self.basic, file_extracted_data.file_thumbnail, self.
|
|
727
|
+
maybe_update_basic_thumbnail(self.basic, file_extracted_data.file_thumbnail, self.kbid)
|
|
724
728
|
self.modified = True
|
|
725
729
|
|
|
726
730
|
async def _should_update_resource_title_from_file_metadata(self) -> bool:
|
|
@@ -742,7 +746,7 @@ class Resource:
|
|
|
742
746
|
filenames = set()
|
|
743
747
|
for (field_type, _), field_obj in fields.items():
|
|
744
748
|
if field_type == FieldType.FILE:
|
|
745
|
-
field_value:
|
|
749
|
+
field_value: FieldFile | None = await field_obj.get_value()
|
|
746
750
|
if field_value is not None:
|
|
747
751
|
if field_value.file.filename not in ("", None):
|
|
748
752
|
filenames.add(field_value.file.filename)
|
|
@@ -767,7 +771,7 @@ class Resource:
|
|
|
767
771
|
fid = FieldId.from_pb(rid=self.uuid, field_type=FieldType.FILE, key=fed.field)
|
|
768
772
|
logger.info(
|
|
769
773
|
"Updating resource title from file extracted data",
|
|
770
|
-
extra={"kbid": self.
|
|
774
|
+
extra={"kbid": self.kbid, "field": fid.full(), "new_title": fed.title},
|
|
771
775
|
)
|
|
772
776
|
await self.update_resource_title(fed.title)
|
|
773
777
|
await self.unmark_title_for_reset()
|
|
@@ -785,9 +789,7 @@ class Resource:
|
|
|
785
789
|
)
|
|
786
790
|
await field_obj.set_field_metadata(field_metadata)
|
|
787
791
|
|
|
788
|
-
maybe_update_basic_thumbnail(
|
|
789
|
-
self.basic, field_metadata.metadata.metadata.thumbnail, self.kb.kbid
|
|
790
|
-
)
|
|
792
|
+
maybe_update_basic_thumbnail(self.basic, field_metadata.metadata.metadata.thumbnail, self.kbid)
|
|
791
793
|
|
|
792
794
|
update_basic_computedmetadata_classifications(self.basic, field_metadata)
|
|
793
795
|
self.modified = True
|
|
@@ -799,7 +801,7 @@ class Resource:
|
|
|
799
801
|
await self.get_fields(force=True)
|
|
800
802
|
vectorsets = {
|
|
801
803
|
vectorset_id: vs
|
|
802
|
-
async for vectorset_id, vs in datamanagers.vectorsets.iter(self.txn, kbid=self.
|
|
804
|
+
async for vectorset_id, vs in datamanagers.vectorsets.iter(self.txn, kbid=self.kbid)
|
|
803
805
|
}
|
|
804
806
|
|
|
805
807
|
for field_vectors in fields_vectors:
|
|
@@ -808,13 +810,13 @@ class Resource:
|
|
|
808
810
|
assert len(vectorsets) == 1, (
|
|
809
811
|
"Invalid broker message, can't ingest vectors from unknown vectorset to KB with multiple vectorsets"
|
|
810
812
|
)
|
|
811
|
-
vectorset =
|
|
813
|
+
vectorset = next(iter(vectorsets.values()))
|
|
812
814
|
|
|
813
815
|
else:
|
|
814
816
|
if field_vectors.vectorset_id not in vectorsets:
|
|
815
817
|
logger.warning(
|
|
816
818
|
"Dropping extracted vectors for unknown vectorset",
|
|
817
|
-
extra={"kbid": self.
|
|
819
|
+
extra={"kbid": self.kbid, "vectorset": field_vectors.vectorset_id},
|
|
818
820
|
)
|
|
819
821
|
continue
|
|
820
822
|
|
|
@@ -925,7 +927,7 @@ def maybe_update_basic_summary(basic: PBBasic, summary_text: str) -> bool:
|
|
|
925
927
|
return True
|
|
926
928
|
|
|
927
929
|
|
|
928
|
-
def maybe_update_basic_icon(basic: PBBasic, mimetype:
|
|
930
|
+
def maybe_update_basic_icon(basic: PBBasic, mimetype: str | None) -> bool:
|
|
929
931
|
if basic.icon not in (None, "", "application/octet-stream", GENERIC_MIME_TYPE):
|
|
930
932
|
# Icon already set or detected
|
|
931
933
|
return False
|
|
@@ -944,7 +946,7 @@ def maybe_update_basic_icon(basic: PBBasic, mimetype: Optional[str]) -> bool:
|
|
|
944
946
|
return True
|
|
945
947
|
|
|
946
948
|
|
|
947
|
-
def maybe_update_basic_thumbnail(basic: PBBasic, thumbnail:
|
|
949
|
+
def maybe_update_basic_thumbnail(basic: PBBasic, thumbnail: CloudFile | None, kbid: str) -> bool:
|
|
948
950
|
if basic.thumbnail or thumbnail is None:
|
|
949
951
|
return False
|
|
950
952
|
basic.thumbnail = CloudLink.format_reader_download_uri(thumbnail.uri)
|
|
@@ -981,7 +983,7 @@ def update_basic_languages(basic: Basic, languages: list[str]) -> bool:
|
|
|
981
983
|
return updated
|
|
982
984
|
|
|
983
985
|
|
|
984
|
-
def get_text_field_mimetype(bm: BrokerMessage) ->
|
|
986
|
+
def get_text_field_mimetype(bm: BrokerMessage) -> str | None:
|
|
985
987
|
if len(bm.texts) == 0:
|
|
986
988
|
return None
|
|
987
989
|
text_format = next(iter(bm.texts.values())).format
|
nucliadb/ingest/orm/utils.py
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
|
|
20
20
|
import urllib.parse
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import Sequence
|
|
22
22
|
|
|
23
23
|
from nucliadb.models.internal.processing import PushPayload, PushTextFormat, Text
|
|
24
24
|
from nucliadb_protos.resources_pb2 import (
|