nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,47 +17,191 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from typing import AsyncGenerator, Optional
|
20
|
+
from typing import TYPE_CHECKING, AsyncGenerator, Optional
|
21
21
|
|
22
22
|
import backoff
|
23
|
-
from nucliadb_protos.resources_pb2 import Basic
|
24
23
|
|
24
|
+
from nucliadb.common.datamanagers.utils import get_kv_pb
|
25
25
|
from nucliadb.common.maindb.driver import Transaction
|
26
26
|
from nucliadb.common.maindb.exceptions import ConflictError, NotFoundError
|
27
27
|
|
28
28
|
# These should be refactored
|
29
|
-
from nucliadb.ingest.
|
30
|
-
from
|
31
|
-
from nucliadb.ingest.orm.utils import get_basic, set_basic
|
32
|
-
from nucliadb_protos import noderesources_pb2, writer_pb2
|
29
|
+
from nucliadb.ingest.settings import settings as ingest_settings
|
30
|
+
from nucliadb_protos import resources_pb2
|
33
31
|
from nucliadb_utils.utilities import get_storage
|
34
32
|
|
35
|
-
from .utils import
|
33
|
+
from .utils import with_ro_transaction
|
36
34
|
|
35
|
+
if TYPE_CHECKING:
|
36
|
+
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
37
|
+
|
38
|
+
|
39
|
+
KB_RESOURCE_BASIC = "/kbs/{kbid}/r/{uuid}"
|
40
|
+
KB_RESOURCE_BASIC_FS = "/kbs/{kbid}/r/{uuid}/basic" # Only used on FS driver
|
41
|
+
KB_RESOURCE_ORIGIN = "/kbs/{kbid}/r/{uuid}/origin"
|
42
|
+
KB_RESOURCE_EXTRA = "/kbs/{kbid}/r/{uuid}/extra"
|
43
|
+
KB_RESOURCE_SECURITY = "/kbs/{kbid}/r/{uuid}/security"
|
44
|
+
KB_RESOURCE_RELATIONS = "/kbs/{kbid}/r/{uuid}/relations"
|
45
|
+
|
46
|
+
KB_RESOURCE_SLUG_BASE = "/kbs/{kbid}/s/"
|
47
|
+
KB_RESOURCE_SLUG = f"{KB_RESOURCE_SLUG_BASE}{{slug}}"
|
48
|
+
|
49
|
+
KB_RESOURCE_FIELDS = "/kbs/{kbid}/r/{uuid}/f/"
|
50
|
+
|
51
|
+
KB_RESOURCE_ALL_FIELDS = "/kbs/{kbid}/r/{uuid}/allfields"
|
37
52
|
KB_MATERIALIZED_RESOURCES_COUNT = "/kbs/{kbid}/materialized/resources/count"
|
53
|
+
|
38
54
|
KB_RESOURCE_SHARD = "/kbs/{kbid}/r/{uuid}/shard"
|
39
55
|
|
40
56
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
async def _iter_resource_slugs(*, kbid: str) -> AsyncGenerator[str, None]:
|
45
|
-
async with with_transaction() as txn:
|
46
|
-
async for key in txn.keys(
|
47
|
-
match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid), count=-1
|
48
|
-
):
|
49
|
-
yield key.split("/")[-1]
|
57
|
+
async def resource_exists(txn: Transaction, *, kbid: str, rid: str) -> bool:
|
58
|
+
basic = await get_basic_raw(txn, kbid=kbid, rid=rid)
|
59
|
+
return basic is not None
|
50
60
|
|
51
61
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
async def
|
56
|
-
|
57
|
-
|
58
|
-
|
62
|
+
# id and slug
|
63
|
+
|
64
|
+
|
65
|
+
async def get_resource_uuid_from_slug(txn: Transaction, *, kbid: str, slug: str) -> Optional[str]:
|
66
|
+
encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug, for_update=False))
|
67
|
+
if not encoded_uuid:
|
68
|
+
return None
|
69
|
+
return encoded_uuid.decode()
|
70
|
+
|
71
|
+
|
72
|
+
async def slug_exists(txn: Transaction, *, kbid: str, slug: str) -> bool:
|
73
|
+
key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug)
|
74
|
+
encoded_slug: Optional[bytes] = await txn.get(key)
|
75
|
+
return encoded_slug not in (None, b"")
|
76
|
+
|
77
|
+
|
78
|
+
async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -> str:
|
79
|
+
basic = await get_basic(txn, kbid=kbid, rid=rid)
|
80
|
+
if basic is None:
|
81
|
+
raise NotFoundError()
|
82
|
+
old_slug = basic.slug
|
83
|
+
|
84
|
+
uuid_for_new_slug = await get_resource_uuid_from_slug(txn, kbid=kbid, slug=new_slug)
|
85
|
+
if uuid_for_new_slug is not None:
|
86
|
+
if uuid_for_new_slug == rid:
|
87
|
+
# Nothing to change
|
88
|
+
return old_slug
|
89
|
+
else:
|
90
|
+
raise ConflictError(f"Slug {new_slug} already exists")
|
91
|
+
key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=old_slug)
|
92
|
+
await txn.delete(key)
|
93
|
+
key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=new_slug)
|
94
|
+
await txn.set(key, rid.encode())
|
95
|
+
basic.slug = new_slug
|
96
|
+
await set_basic(txn, kbid=kbid, rid=rid, basic=basic)
|
97
|
+
return old_slug
|
98
|
+
|
99
|
+
|
100
|
+
# resource-shard
|
101
|
+
|
102
|
+
|
103
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
104
|
+
async def get_resource_shard_id(
|
105
|
+
txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
|
106
|
+
) -> Optional[str]:
|
107
|
+
shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid, for_update=for_update))
|
108
|
+
if shard is not None:
|
109
|
+
return shard.decode()
|
110
|
+
else:
|
111
|
+
return None
|
112
|
+
|
113
|
+
|
114
|
+
async def set_resource_shard_id(txn: Transaction, *, kbid: str, rid: str, shard: str):
|
115
|
+
await txn.set(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid), shard.encode())
|
116
|
+
|
117
|
+
|
118
|
+
# Basic
|
119
|
+
|
120
|
+
|
121
|
+
async def get_basic(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Basic]:
|
122
|
+
raw = await get_basic_raw(txn, kbid=kbid, rid=rid)
|
123
|
+
if raw is None:
|
124
|
+
return None
|
125
|
+
basic = resources_pb2.Basic()
|
126
|
+
basic.ParseFromString(raw)
|
127
|
+
return basic
|
128
|
+
|
129
|
+
|
130
|
+
async def get_basic_raw(txn: Transaction, *, kbid: str, rid: str) -> Optional[bytes]:
|
131
|
+
if ingest_settings.driver == "local":
|
132
|
+
raw_basic = await txn.get(KB_RESOURCE_BASIC_FS.format(kbid=kbid, uuid=rid))
|
133
|
+
else:
|
134
|
+
raw_basic = await txn.get(KB_RESOURCE_BASIC.format(kbid=kbid, uuid=rid))
|
135
|
+
return raw_basic
|
136
|
+
|
137
|
+
|
138
|
+
async def set_basic(txn: Transaction, *, kbid: str, rid: str, basic: resources_pb2.Basic):
|
139
|
+
if ingest_settings.driver == "local":
|
140
|
+
await txn.set(
|
141
|
+
KB_RESOURCE_BASIC_FS.format(kbid=kbid, uuid=rid),
|
142
|
+
basic.SerializeToString(),
|
59
143
|
)
|
60
|
-
|
144
|
+
else:
|
145
|
+
await txn.set(
|
146
|
+
KB_RESOURCE_BASIC.format(kbid=kbid, uuid=rid),
|
147
|
+
basic.SerializeToString(),
|
148
|
+
)
|
149
|
+
|
150
|
+
|
151
|
+
# Origin
|
152
|
+
|
153
|
+
|
154
|
+
async def get_origin(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Origin]:
|
155
|
+
key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
|
156
|
+
return await get_kv_pb(txn, key, resources_pb2.Origin)
|
157
|
+
|
158
|
+
|
159
|
+
async def set_origin(txn: Transaction, *, kbid: str, rid: str, origin: resources_pb2.Origin):
|
160
|
+
key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
|
161
|
+
await txn.set(key, origin.SerializeToString())
|
162
|
+
|
163
|
+
|
164
|
+
# Extra
|
165
|
+
|
166
|
+
|
167
|
+
async def get_extra(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Extra]:
|
168
|
+
key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
|
169
|
+
return await get_kv_pb(txn, key, resources_pb2.Extra)
|
170
|
+
|
171
|
+
|
172
|
+
async def set_extra(txn: Transaction, *, kbid: str, rid: str, extra: resources_pb2.Extra):
|
173
|
+
key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
|
174
|
+
await txn.set(key, extra.SerializeToString())
|
175
|
+
|
176
|
+
|
177
|
+
# Security
|
178
|
+
|
179
|
+
|
180
|
+
async def get_security(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Security]:
|
181
|
+
key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
|
182
|
+
return await get_kv_pb(txn, key, resources_pb2.Security)
|
183
|
+
|
184
|
+
|
185
|
+
async def set_security(txn: Transaction, *, kbid: str, rid: str, security: resources_pb2.Security):
|
186
|
+
key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
|
187
|
+
await txn.set(key, security.SerializeToString())
|
188
|
+
|
189
|
+
|
190
|
+
# Relations
|
191
|
+
|
192
|
+
|
193
|
+
async def get_relations(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Relations]:
|
194
|
+
key = KB_RESOURCE_RELATIONS.format(kbid=kbid, uuid=rid)
|
195
|
+
return await get_kv_pb(txn, key, resources_pb2.Relations)
|
196
|
+
|
197
|
+
|
198
|
+
async def set_relations(txn: Transaction, *, kbid: str, rid: str, relations: resources_pb2.Relations):
|
199
|
+
key = KB_RESOURCE_RELATIONS.format(kbid=kbid, uuid=rid)
|
200
|
+
await txn.set(key, relations.SerializeToString())
|
201
|
+
|
202
|
+
|
203
|
+
# KB resource ids (this functions use internal transactions, breaking the
|
204
|
+
# datamanager contract. We should rethink them at some point)
|
61
205
|
|
62
206
|
|
63
207
|
async def iterate_resource_ids(*, kbid: str) -> AsyncGenerator[str, None]:
|
@@ -80,52 +224,21 @@ async def iterate_resource_ids(*, kbid: str) -> AsyncGenerator[str, None]:
|
|
80
224
|
yield rid
|
81
225
|
|
82
226
|
|
83
|
-
@backoff.on_exception(
|
84
|
-
|
85
|
-
)
|
86
|
-
async
|
87
|
-
|
88
|
-
) -> Optional[str]:
|
89
|
-
shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid))
|
90
|
-
if shard is not None:
|
91
|
-
return shard.decode()
|
92
|
-
else:
|
93
|
-
return None
|
94
|
-
|
95
|
-
|
96
|
-
@backoff.on_exception(
|
97
|
-
backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
|
98
|
-
)
|
99
|
-
async def get_resource(
|
100
|
-
txn: Transaction, *, kbid: str, rid: str
|
101
|
-
) -> Optional[ResourceORM]:
|
102
|
-
"""
|
103
|
-
Not ideal to return Resource type here but refactoring would
|
104
|
-
require a lot of changes.
|
105
|
-
|
106
|
-
At least this isolated that dependency here.
|
107
|
-
"""
|
108
|
-
# prevent circulat imports -- this is not ideal that we have the ORM mix here.
|
109
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
227
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
228
|
+
async def _iter_resource_slugs(*, kbid: str) -> AsyncGenerator[str, None]:
|
229
|
+
async with with_ro_transaction() as txn:
|
230
|
+
async for key in txn.keys(match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid)):
|
231
|
+
yield key.split("/")[-1]
|
110
232
|
|
111
|
-
kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
|
112
|
-
return await kb_orm.get(rid)
|
113
233
|
|
234
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
235
|
+
async def _get_resource_ids_from_slugs(kbid: str, slugs: list[str]) -> list[str]:
|
236
|
+
async with with_ro_transaction() as txn:
|
237
|
+
rids = await txn.batch_get([KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug) for slug in slugs])
|
238
|
+
return [rid.decode() for rid in rids if rid is not None]
|
114
239
|
|
115
|
-
@backoff.on_exception(
|
116
|
-
backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
|
117
|
-
)
|
118
|
-
async def get_resource_index_message(
|
119
|
-
txn: Transaction, *, kbid: str, rid: str
|
120
|
-
) -> Optional[noderesources_pb2.Resource]:
|
121
|
-
# prevent circulat imports -- this is not ideal that we have the ORM mix here.
|
122
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
123
240
|
|
124
|
-
|
125
|
-
res = await kb_orm.get(rid)
|
126
|
-
if res is None:
|
127
|
-
return None
|
128
|
-
return (await res.generate_index_message()).brain
|
241
|
+
# KB resource count (materialized key)
|
129
242
|
|
130
243
|
|
131
244
|
async def calculate_number_of_resources(txn: Transaction, *, kbid: str) -> int:
|
@@ -150,72 +263,56 @@ async def get_number_of_resources(txn: Transaction, *, kbid: str) -> int:
|
|
150
263
|
"""
|
151
264
|
Return cached number of resources in a knowledgebox.
|
152
265
|
"""
|
153
|
-
raw_value = await txn.get(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid))
|
266
|
+
raw_value = await txn.get(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), for_update=False)
|
154
267
|
if raw_value is None:
|
155
268
|
return -1
|
156
269
|
return int(raw_value)
|
157
270
|
|
158
271
|
|
159
272
|
async def set_number_of_resources(txn: Transaction, kbid: str, value: int) -> None:
|
160
|
-
await txn.set(
|
161
|
-
KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), str(value).encode()
|
162
|
-
)
|
273
|
+
await txn.set(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), str(value).encode())
|
163
274
|
|
164
275
|
|
165
|
-
|
166
|
-
txn: Transaction, *, kbid: str, rid: str
|
167
|
-
) -> Optional[writer_pb2.BrokerMessage]:
|
168
|
-
resource = await get_resource(txn, kbid=kbid, rid=rid)
|
169
|
-
if resource is None:
|
170
|
-
return None
|
276
|
+
# Fields (materialized key with all field ids)
|
171
277
|
|
172
|
-
resource.disable_vectors = False
|
173
|
-
resource.txn = txn
|
174
|
-
bm = await resource.generate_broker_message()
|
175
|
-
return bm
|
176
278
|
|
279
|
+
async def get_all_field_ids(
|
280
|
+
txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
|
281
|
+
) -> Optional[resources_pb2.AllFieldIDs]:
|
282
|
+
key = KB_RESOURCE_ALL_FIELDS.format(kbid=kbid, uuid=rid)
|
283
|
+
return await get_kv_pb(txn, key, resources_pb2.AllFieldIDs, for_update=for_update)
|
177
284
|
|
178
|
-
async def get_resource_basic(
|
179
|
-
txn: Transaction, *, kbid: str, rid: str
|
180
|
-
) -> Optional[Basic]:
|
181
|
-
raw_basic = await get_basic(txn, kbid, rid)
|
182
|
-
if not raw_basic:
|
183
|
-
return None
|
184
|
-
basic = Basic()
|
185
|
-
basic.ParseFromString(raw_basic)
|
186
|
-
return basic
|
187
285
|
|
286
|
+
async def set_all_field_ids(
|
287
|
+
txn: Transaction, *, kbid: str, rid: str, allfields: resources_pb2.AllFieldIDs
|
288
|
+
):
|
289
|
+
key = KB_RESOURCE_ALL_FIELDS.format(kbid=kbid, uuid=rid)
|
290
|
+
await txn.set(key, allfields.SerializeToString())
|
188
291
|
|
189
|
-
async def get_resource_uuid_from_slug(
|
190
|
-
txn: Transaction, *, kbid: str, slug: str
|
191
|
-
) -> Optional[str]:
|
192
|
-
encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug))
|
193
|
-
if not encoded_uuid:
|
194
|
-
return None
|
195
|
-
return encoded_uuid.decode()
|
196
292
|
|
293
|
+
async def has_field(txn: Transaction, *, kbid: str, rid: str, field_id: resources_pb2.FieldID) -> bool:
|
294
|
+
fields = await get_all_field_ids(txn, kbid=kbid, rid=rid)
|
295
|
+
if fields is None:
|
296
|
+
return False
|
297
|
+
for resource_field_id in fields.fields:
|
298
|
+
if field_id == resource_field_id:
|
299
|
+
return True
|
300
|
+
return False
|
197
301
|
|
198
|
-
async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -> str:
|
199
|
-
basic = await get_resource_basic(txn, kbid=kbid, rid=rid)
|
200
|
-
if basic is None:
|
201
|
-
raise NotFoundError()
|
202
|
-
old_slug = basic.slug
|
203
302
|
|
204
|
-
|
205
|
-
if uuid_for_new_slug is not None:
|
206
|
-
if uuid_for_new_slug == rid:
|
207
|
-
# Nothing to change
|
208
|
-
return old_slug
|
209
|
-
else:
|
210
|
-
raise ConflictError(f"Slug {new_slug} already exists")
|
211
|
-
key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=old_slug)
|
212
|
-
await txn.delete(key)
|
213
|
-
key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=new_slug)
|
214
|
-
await txn.set(key, rid.encode())
|
215
|
-
basic.slug = new_slug
|
216
|
-
await set_basic(txn, kbid, rid, basic)
|
217
|
-
return old_slug
|
303
|
+
# ORM mix (this functions shouldn't belong here)
|
218
304
|
|
219
305
|
|
220
|
-
|
221
|
-
|
306
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
307
|
+
async def get_resource(txn: Transaction, *, kbid: str, rid: str) -> Optional["ResourceORM"]:
|
308
|
+
"""
|
309
|
+
Not ideal to return Resource type here but refactoring would
|
310
|
+
require a lot of changes.
|
311
|
+
|
312
|
+
At least this isolated that dependency here.
|
313
|
+
"""
|
314
|
+
# prevent circulat imports -- this is not ideal that we have the ORM mix here.
|
315
|
+
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
316
|
+
|
317
|
+
kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
|
318
|
+
return await kb_orm.get(rid)
|
@@ -21,22 +21,42 @@ import logging
|
|
21
21
|
from typing import AsyncGenerator, Optional
|
22
22
|
|
23
23
|
import orjson
|
24
|
+
from pydantic import BaseModel
|
24
25
|
|
25
26
|
from nucliadb.common.maindb.driver import Transaction
|
27
|
+
from nucliadb_protos import knowledgebox_pb2 as kb_pb2
|
26
28
|
from nucliadb_protos import writer_pb2
|
27
29
|
|
28
|
-
from .utils import get_kv_pb,
|
30
|
+
from .utils import get_kv_pb, with_ro_transaction
|
29
31
|
|
30
32
|
logger = logging.getLogger(__name__)
|
31
33
|
|
34
|
+
KB_ROLLOVER_STATE = "/kbs/{kbid}/rollover/state"
|
32
35
|
KB_ROLLOVER_SHARDS = "/kbs/{kbid}/rollover/shards"
|
36
|
+
KB_ROLLOVER_EXTERNAL_INDEX_METADATA = "/kbs/{kbid}/rollover/external_index_metadata"
|
33
37
|
KB_ROLLOVER_RESOURCES_TO_INDEX = "/kbs/{kbid}/rollover/to-index/{resource}"
|
34
38
|
KB_ROLLOVER_RESOURCES_INDEXED = "/kbs/{kbid}/rollover/indexed/{resource}"
|
35
39
|
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
41
|
+
class RolloverState(BaseModel):
|
42
|
+
rollover_shards_created: bool = False
|
43
|
+
external_index_created: bool = False
|
44
|
+
resources_scheduled: bool = False
|
45
|
+
resources_indexed: bool = False
|
46
|
+
cutover_shards: bool = False
|
47
|
+
cutover_external_index: bool = False
|
48
|
+
resources_validated: bool = False
|
49
|
+
|
50
|
+
|
51
|
+
class RolloverStateNotFoundError(Exception):
|
52
|
+
"""
|
53
|
+
Raised when the rollover state is not found.
|
54
|
+
"""
|
55
|
+
|
56
|
+
...
|
57
|
+
|
58
|
+
|
59
|
+
async def get_kb_rollover_shards(txn: Transaction, *, kbid: str) -> Optional[writer_pb2.Shards]:
|
40
60
|
key = KB_ROLLOVER_SHARDS.format(kbid=kbid)
|
41
61
|
return await get_kv_pb(txn, key, writer_pb2.Shards)
|
42
62
|
|
@@ -90,7 +110,7 @@ async def add_indexed(
|
|
90
110
|
kbid: str,
|
91
111
|
resource_id: str,
|
92
112
|
shard_id: str,
|
93
|
-
modification_time: int
|
113
|
+
modification_time: int,
|
94
114
|
) -> None:
|
95
115
|
to_index = KB_ROLLOVER_RESOURCES_TO_INDEX.format(kbid=kbid, resource=resource_id)
|
96
116
|
indexed = KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource=resource_id)
|
@@ -106,7 +126,9 @@ async def get_indexed_data(
|
|
106
126
|
val = await txn.get(key)
|
107
127
|
if val is not None:
|
108
128
|
data = orjson.loads(val)
|
109
|
-
|
129
|
+
shard_id: str = data[0]
|
130
|
+
modification_time: int = data[1]
|
131
|
+
return shard_id, modification_time
|
110
132
|
return None
|
111
133
|
|
112
134
|
|
@@ -122,15 +144,13 @@ async def iter_indexed_keys(*, kbid: str) -> AsyncGenerator[str, None]:
|
|
122
144
|
internally managed
|
123
145
|
"""
|
124
146
|
start_key = KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource="")
|
125
|
-
async with
|
126
|
-
async for key in txn.keys(match=start_key
|
147
|
+
async with with_ro_transaction() as txn:
|
148
|
+
async for key in txn.keys(match=start_key):
|
127
149
|
yield key.split("/")[-1]
|
128
150
|
|
129
151
|
|
130
|
-
async def _get_batch_indexed_data(
|
131
|
-
|
132
|
-
) -> list[tuple[str, tuple[str, int]]]:
|
133
|
-
async with with_transaction() as txn:
|
152
|
+
async def _get_batch_indexed_data(*, kbid, batch: list[str]) -> list[tuple[str, tuple[str, int]]]:
|
153
|
+
async with with_ro_transaction() as txn:
|
134
154
|
values = await txn.batch_get(
|
135
155
|
[
|
136
156
|
KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource=resource_id)
|
@@ -140,14 +160,15 @@ async def _get_batch_indexed_data(
|
|
140
160
|
results: list[tuple[str, tuple[str, int]]] = []
|
141
161
|
for key, val in zip(batch, values):
|
142
162
|
if val is not None:
|
143
|
-
|
163
|
+
shard_id: str
|
164
|
+
modification_time: int
|
165
|
+
shard_id, modification_time = orjson.loads(val)
|
166
|
+
data = (shard_id, modification_time)
|
144
167
|
results.append((key.split("/")[-1], data))
|
145
168
|
return results
|
146
169
|
|
147
170
|
|
148
|
-
async def iterate_indexed_data(
|
149
|
-
*, kbid: str
|
150
|
-
) -> AsyncGenerator[tuple[str, tuple[str, int]], None]:
|
171
|
+
async def iterate_indexed_data(*, kbid: str) -> AsyncGenerator[tuple[str, tuple[str, int]], None]:
|
151
172
|
"""
|
152
173
|
This function is optimized for reducing the time a transaction is open.
|
153
174
|
|
@@ -164,3 +185,43 @@ async def iterate_indexed_data(
|
|
164
185
|
if len(batch) > 0:
|
165
186
|
for key, val in await _get_batch_indexed_data(kbid=kbid, batch=batch):
|
166
187
|
yield key, val
|
188
|
+
|
189
|
+
|
190
|
+
async def get_rollover_state(txn: Transaction, kbid: str) -> RolloverState:
|
191
|
+
key = KB_ROLLOVER_STATE.format(kbid=kbid)
|
192
|
+
val = await txn.get(key)
|
193
|
+
if not val:
|
194
|
+
raise RolloverStateNotFoundError(kbid)
|
195
|
+
return RolloverState.model_validate_json(val)
|
196
|
+
|
197
|
+
|
198
|
+
async def set_rollover_state(txn: Transaction, kbid: str, state: RolloverState) -> None:
|
199
|
+
key = KB_ROLLOVER_STATE.format(kbid=kbid)
|
200
|
+
await txn.set(key, state.model_dump_json().encode())
|
201
|
+
|
202
|
+
|
203
|
+
async def clear_rollover_state(txn: Transaction, kbid: str) -> None:
|
204
|
+
key = KB_ROLLOVER_STATE.format(kbid=kbid)
|
205
|
+
await txn.delete(key)
|
206
|
+
|
207
|
+
|
208
|
+
async def update_kb_rollover_external_index_metadata(
|
209
|
+
txn: Transaction, *, kbid: str, metadata: kb_pb2.StoredExternalIndexProviderMetadata
|
210
|
+
) -> None:
|
211
|
+
key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
|
212
|
+
await txn.set(key, metadata.SerializeToString())
|
213
|
+
|
214
|
+
|
215
|
+
async def get_kb_rollover_external_index_metadata(
|
216
|
+
txn: Transaction, *, kbid: str
|
217
|
+
) -> Optional[kb_pb2.StoredExternalIndexProviderMetadata]:
|
218
|
+
key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
|
219
|
+
val = await txn.get(key)
|
220
|
+
if not val:
|
221
|
+
return None
|
222
|
+
return kb_pb2.StoredExternalIndexProviderMetadata.FromString(val)
|
223
|
+
|
224
|
+
|
225
|
+
async def delete_kb_rollover_external_index_metadata(txn: Transaction, *, kbid: str) -> None:
|
226
|
+
key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
|
227
|
+
await txn.delete(key)
|
@@ -17,38 +17,26 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from typing import Optional
|
21
20
|
|
22
|
-
from
|
21
|
+
from typing import Optional
|
23
22
|
|
23
|
+
from nucliadb.common.datamanagers.utils import get_kv_pb
|
24
24
|
from nucliadb.common.maindb.driver import Transaction
|
25
|
+
from nucliadb_protos import knowledgebox_pb2
|
25
26
|
|
26
27
|
KB_SYNONYMS = "/kbs/{kbid}/synonyms"
|
27
28
|
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
async def get(self) -> Optional[PBSynonyms]:
|
43
|
-
try:
|
44
|
-
payload = await self.txn.get(self.key)
|
45
|
-
except KeyError:
|
46
|
-
return None
|
47
|
-
if payload is None:
|
48
|
-
return None
|
49
|
-
body = PBSynonyms()
|
50
|
-
body.ParseFromString(payload)
|
51
|
-
return body
|
52
|
-
|
53
|
-
async def clear(self):
|
54
|
-
await self.txn.delete(self.key)
|
30
|
+
async def get(txn: Transaction, *, kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
|
31
|
+
key = KB_SYNONYMS.format(kbid=kbid)
|
32
|
+
return await get_kv_pb(txn, key, knowledgebox_pb2.Synonyms, for_update=False)
|
33
|
+
|
34
|
+
|
35
|
+
async def set(txn: Transaction, *, kbid: str, synonyms: knowledgebox_pb2.Synonyms):
|
36
|
+
key = KB_SYNONYMS.format(kbid=kbid)
|
37
|
+
await txn.set(key, synonyms.SerializeToString())
|
38
|
+
|
39
|
+
|
40
|
+
async def delete(txn: Transaction, *, kbid: str):
|
41
|
+
key = KB_SYNONYMS.format(kbid=kbid)
|
42
|
+
await txn.delete(key)
|
@@ -29,21 +29,29 @@ PB_TYPE = TypeVar("PB_TYPE", bound=Message)
|
|
29
29
|
|
30
30
|
|
31
31
|
async def get_kv_pb(
|
32
|
-
txn: Transaction, key: str, pb_type: Type[PB_TYPE]
|
32
|
+
txn: Transaction, key: str, pb_type: Type[PB_TYPE], for_update: bool = True
|
33
33
|
) -> Optional[PB_TYPE]:
|
34
|
-
|
35
|
-
if
|
36
|
-
kb_shards = pb_type()
|
37
|
-
kb_shards.ParseFromString(kb_shards_bytes)
|
38
|
-
return kb_shards
|
39
|
-
else:
|
34
|
+
serialized: Optional[bytes] = await txn.get(key, for_update=for_update)
|
35
|
+
if serialized is None:
|
40
36
|
return None
|
37
|
+
pb = pb_type()
|
38
|
+
pb.ParseFromString(serialized)
|
39
|
+
return pb
|
41
40
|
|
42
41
|
|
43
42
|
@contextlib.asynccontextmanager
|
44
|
-
async def
|
43
|
+
async def with_rw_transaction():
|
45
44
|
driver = get_driver()
|
46
|
-
async with driver.transaction(
|
47
|
-
read_only=read_only, wait_for_abort=wait_for_abort
|
48
|
-
) as txn:
|
45
|
+
async with driver.transaction(read_only=False) as txn:
|
49
46
|
yield txn
|
47
|
+
|
48
|
+
|
49
|
+
# For backwards compatibility
|
50
|
+
with_transaction = with_rw_transaction
|
51
|
+
|
52
|
+
|
53
|
+
@contextlib.asynccontextmanager
|
54
|
+
async def with_ro_transaction():
|
55
|
+
driver = get_driver()
|
56
|
+
async with driver.transaction(read_only=True) as ro_txn:
|
57
|
+
yield ro_txn
|