nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -17,7 +17,7 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from typing import AsyncGenerator, Optional
|
20
|
+
from typing import TYPE_CHECKING, AsyncGenerator, Optional
|
21
21
|
|
22
22
|
import backoff
|
23
23
|
|
@@ -26,13 +26,15 @@ from nucliadb.common.maindb.driver import Transaction
|
|
26
26
|
from nucliadb.common.maindb.exceptions import ConflictError, NotFoundError
|
27
27
|
|
28
28
|
# These should be refactored
|
29
|
-
from nucliadb.ingest.orm.resource import KB_RESOURCE_SLUG, KB_RESOURCE_SLUG_BASE
|
30
|
-
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
31
29
|
from nucliadb.ingest.settings import settings as ingest_settings
|
32
|
-
from nucliadb_protos import
|
30
|
+
from nucliadb_protos import resources_pb2
|
33
31
|
from nucliadb_utils.utilities import get_storage
|
34
32
|
|
35
|
-
from .utils import
|
33
|
+
from .utils import with_ro_transaction
|
34
|
+
|
35
|
+
if TYPE_CHECKING:
|
36
|
+
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
37
|
+
|
36
38
|
|
37
39
|
KB_RESOURCE_BASIC = "/kbs/{kbid}/r/{uuid}"
|
38
40
|
KB_RESOURCE_BASIC_FS = "/kbs/{kbid}/r/{uuid}/basic" # Only used on FS driver
|
@@ -41,11 +43,16 @@ KB_RESOURCE_EXTRA = "/kbs/{kbid}/r/{uuid}/extra"
|
|
41
43
|
KB_RESOURCE_SECURITY = "/kbs/{kbid}/r/{uuid}/security"
|
42
44
|
KB_RESOURCE_RELATIONS = "/kbs/{kbid}/r/{uuid}/relations"
|
43
45
|
|
44
|
-
|
46
|
+
KB_RESOURCE_SLUG_BASE = "/kbs/{kbid}/s/"
|
47
|
+
KB_RESOURCE_SLUG = f"{KB_RESOURCE_SLUG_BASE}{{slug}}"
|
48
|
+
|
49
|
+
KB_RESOURCE_FIELDS = "/kbs/{kbid}/r/{uuid}/f/"
|
45
50
|
|
46
51
|
KB_RESOURCE_ALL_FIELDS = "/kbs/{kbid}/r/{uuid}/allfields"
|
47
52
|
KB_MATERIALIZED_RESOURCES_COUNT = "/kbs/{kbid}/materialized/resources/count"
|
48
53
|
|
54
|
+
KB_RESOURCE_SHARD = "/kbs/{kbid}/r/{uuid}/shard"
|
55
|
+
|
49
56
|
|
50
57
|
async def resource_exists(txn: Transaction, *, kbid: str, rid: str) -> bool:
|
51
58
|
basic = await get_basic_raw(txn, kbid=kbid, rid=rid)
|
@@ -55,10 +62,8 @@ async def resource_exists(txn: Transaction, *, kbid: str, rid: str) -> bool:
|
|
55
62
|
# id and slug
|
56
63
|
|
57
64
|
|
58
|
-
async def get_resource_uuid_from_slug(
|
59
|
-
|
60
|
-
) -> Optional[str]:
|
61
|
-
encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug))
|
65
|
+
async def get_resource_uuid_from_slug(txn: Transaction, *, kbid: str, slug: str) -> Optional[str]:
|
66
|
+
encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug, for_update=False))
|
62
67
|
if not encoded_uuid:
|
63
68
|
return None
|
64
69
|
return encoded_uuid.decode()
|
@@ -95,13 +100,11 @@ async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -
|
|
95
100
|
# resource-shard
|
96
101
|
|
97
102
|
|
98
|
-
@backoff.on_exception(
|
99
|
-
backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
|
100
|
-
)
|
103
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
101
104
|
async def get_resource_shard_id(
|
102
|
-
txn: Transaction, *, kbid: str, rid: str
|
105
|
+
txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
|
103
106
|
) -> Optional[str]:
|
104
|
-
shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid))
|
107
|
+
shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid, for_update=for_update))
|
105
108
|
if shard is not None:
|
106
109
|
return shard.decode()
|
107
110
|
else:
|
@@ -115,9 +118,7 @@ async def set_resource_shard_id(txn: Transaction, *, kbid: str, rid: str, shard:
|
|
115
118
|
# Basic
|
116
119
|
|
117
120
|
|
118
|
-
async def get_basic(
|
119
|
-
txn: Transaction, *, kbid: str, rid: str
|
120
|
-
) -> Optional[resources_pb2.Basic]:
|
121
|
+
async def get_basic(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Basic]:
|
121
122
|
raw = await get_basic_raw(txn, kbid=kbid, rid=rid)
|
122
123
|
if raw is None:
|
123
124
|
return None
|
@@ -134,9 +135,7 @@ async def get_basic_raw(txn: Transaction, *, kbid: str, rid: str) -> Optional[by
|
|
134
135
|
return raw_basic
|
135
136
|
|
136
137
|
|
137
|
-
async def set_basic(
|
138
|
-
txn: Transaction, *, kbid: str, rid: str, basic: resources_pb2.Basic
|
139
|
-
):
|
138
|
+
async def set_basic(txn: Transaction, *, kbid: str, rid: str, basic: resources_pb2.Basic):
|
140
139
|
if ingest_settings.driver == "local":
|
141
140
|
await txn.set(
|
142
141
|
KB_RESOURCE_BASIC_FS.format(kbid=kbid, uuid=rid),
|
@@ -152,16 +151,12 @@ async def set_basic(
|
|
152
151
|
# Origin
|
153
152
|
|
154
153
|
|
155
|
-
async def get_origin(
|
156
|
-
txn: Transaction, *, kbid: str, rid: str
|
157
|
-
) -> Optional[resources_pb2.Origin]:
|
154
|
+
async def get_origin(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Origin]:
|
158
155
|
key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
|
159
156
|
return await get_kv_pb(txn, key, resources_pb2.Origin)
|
160
157
|
|
161
158
|
|
162
|
-
async def set_origin(
|
163
|
-
txn: Transaction, *, kbid: str, rid: str, origin: resources_pb2.Origin
|
164
|
-
):
|
159
|
+
async def set_origin(txn: Transaction, *, kbid: str, rid: str, origin: resources_pb2.Origin):
|
165
160
|
key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
|
166
161
|
await txn.set(key, origin.SerializeToString())
|
167
162
|
|
@@ -169,16 +164,12 @@ async def set_origin(
|
|
169
164
|
# Extra
|
170
165
|
|
171
166
|
|
172
|
-
async def get_extra(
|
173
|
-
txn: Transaction, *, kbid: str, rid: str
|
174
|
-
) -> Optional[resources_pb2.Extra]:
|
167
|
+
async def get_extra(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Extra]:
|
175
168
|
key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
|
176
169
|
return await get_kv_pb(txn, key, resources_pb2.Extra)
|
177
170
|
|
178
171
|
|
179
|
-
async def set_extra(
|
180
|
-
txn: Transaction, *, kbid: str, rid: str, extra: resources_pb2.Extra
|
181
|
-
):
|
172
|
+
async def set_extra(txn: Transaction, *, kbid: str, rid: str, extra: resources_pb2.Extra):
|
182
173
|
key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
|
183
174
|
await txn.set(key, extra.SerializeToString())
|
184
175
|
|
@@ -186,16 +177,12 @@ async def set_extra(
|
|
186
177
|
# Security
|
187
178
|
|
188
179
|
|
189
|
-
async def get_security(
|
190
|
-
txn: Transaction, *, kbid: str, rid: str
|
191
|
-
) -> Optional[resources_pb2.Security]:
|
180
|
+
async def get_security(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Security]:
|
192
181
|
key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
|
193
182
|
return await get_kv_pb(txn, key, resources_pb2.Security)
|
194
183
|
|
195
184
|
|
196
|
-
async def set_security(
|
197
|
-
txn: Transaction, *, kbid: str, rid: str, security: resources_pb2.Security
|
198
|
-
):
|
185
|
+
async def set_security(txn: Transaction, *, kbid: str, rid: str, security: resources_pb2.Security):
|
199
186
|
key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
|
200
187
|
await txn.set(key, security.SerializeToString())
|
201
188
|
|
@@ -203,16 +190,12 @@ async def set_security(
|
|
203
190
|
# Relations
|
204
191
|
|
205
192
|
|
206
|
-
async def get_relations(
|
207
|
-
txn: Transaction, *, kbid: str, rid: str
|
208
|
-
) -> Optional[resources_pb2.Relations]:
|
193
|
+
async def get_relations(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Relations]:
|
209
194
|
key = KB_RESOURCE_RELATIONS.format(kbid=kbid, uuid=rid)
|
210
195
|
return await get_kv_pb(txn, key, resources_pb2.Relations)
|
211
196
|
|
212
197
|
|
213
|
-
async def set_relations(
|
214
|
-
txn: Transaction, *, kbid: str, rid: str, relations: resources_pb2.Relations
|
215
|
-
):
|
198
|
+
async def set_relations(txn: Transaction, *, kbid: str, rid: str, relations: resources_pb2.Relations):
|
216
199
|
key = KB_RESOURCE_RELATIONS.format(kbid=kbid, uuid=rid)
|
217
200
|
await txn.set(key, relations.SerializeToString())
|
218
201
|
|
@@ -241,25 +224,17 @@ async def iterate_resource_ids(*, kbid: str) -> AsyncGenerator[str, None]:
|
|
241
224
|
yield rid
|
242
225
|
|
243
226
|
|
244
|
-
@backoff.on_exception(
|
245
|
-
backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
|
246
|
-
)
|
227
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
247
228
|
async def _iter_resource_slugs(*, kbid: str) -> AsyncGenerator[str, None]:
|
248
|
-
async with
|
249
|
-
async for key in txn.keys(
|
250
|
-
match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid), count=-1
|
251
|
-
):
|
229
|
+
async with with_ro_transaction() as txn:
|
230
|
+
async for key in txn.keys(match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid)):
|
252
231
|
yield key.split("/")[-1]
|
253
232
|
|
254
233
|
|
255
|
-
@backoff.on_exception(
|
256
|
-
backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
|
257
|
-
)
|
234
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
258
235
|
async def _get_resource_ids_from_slugs(kbid: str, slugs: list[str]) -> list[str]:
|
259
|
-
async with
|
260
|
-
rids = await txn.batch_get(
|
261
|
-
[KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug) for slug in slugs]
|
262
|
-
)
|
236
|
+
async with with_ro_transaction() as txn:
|
237
|
+
rids = await txn.batch_get([KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug) for slug in slugs])
|
263
238
|
return [rid.decode() for rid in rids if rid is not None]
|
264
239
|
|
265
240
|
|
@@ -288,26 +263,24 @@ async def get_number_of_resources(txn: Transaction, *, kbid: str) -> int:
|
|
288
263
|
"""
|
289
264
|
Return cached number of resources in a knowledgebox.
|
290
265
|
"""
|
291
|
-
raw_value = await txn.get(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid))
|
266
|
+
raw_value = await txn.get(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), for_update=False)
|
292
267
|
if raw_value is None:
|
293
268
|
return -1
|
294
269
|
return int(raw_value)
|
295
270
|
|
296
271
|
|
297
272
|
async def set_number_of_resources(txn: Transaction, kbid: str, value: int) -> None:
|
298
|
-
await txn.set(
|
299
|
-
KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), str(value).encode()
|
300
|
-
)
|
273
|
+
await txn.set(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), str(value).encode())
|
301
274
|
|
302
275
|
|
303
276
|
# Fields (materialized key with all field ids)
|
304
277
|
|
305
278
|
|
306
279
|
async def get_all_field_ids(
|
307
|
-
txn: Transaction, *, kbid: str, rid: str
|
280
|
+
txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
|
308
281
|
) -> Optional[resources_pb2.AllFieldIDs]:
|
309
282
|
key = KB_RESOURCE_ALL_FIELDS.format(kbid=kbid, uuid=rid)
|
310
|
-
return await get_kv_pb(txn, key, resources_pb2.AllFieldIDs)
|
283
|
+
return await get_kv_pb(txn, key, resources_pb2.AllFieldIDs, for_update=for_update)
|
311
284
|
|
312
285
|
|
313
286
|
async def set_all_field_ids(
|
@@ -317,9 +290,7 @@ async def set_all_field_ids(
|
|
317
290
|
await txn.set(key, allfields.SerializeToString())
|
318
291
|
|
319
292
|
|
320
|
-
async def has_field(
|
321
|
-
txn: Transaction, *, kbid: str, rid: str, field_id: resources_pb2.FieldID
|
322
|
-
) -> bool:
|
293
|
+
async def has_field(txn: Transaction, *, kbid: str, rid: str, field_id: resources_pb2.FieldID) -> bool:
|
323
294
|
fields = await get_all_field_ids(txn, kbid=kbid, rid=rid)
|
324
295
|
if fields is None:
|
325
296
|
return False
|
@@ -332,25 +303,8 @@ async def has_field(
|
|
332
303
|
# ORM mix (this functions shouldn't belong here)
|
333
304
|
|
334
305
|
|
335
|
-
|
336
|
-
|
337
|
-
) -> Optional[writer_pb2.BrokerMessage]:
|
338
|
-
resource = await get_resource(txn, kbid=kbid, rid=rid)
|
339
|
-
if resource is None:
|
340
|
-
return None
|
341
|
-
|
342
|
-
resource.disable_vectors = False
|
343
|
-
resource.txn = txn
|
344
|
-
bm = await resource.generate_broker_message()
|
345
|
-
return bm
|
346
|
-
|
347
|
-
|
348
|
-
@backoff.on_exception(
|
349
|
-
backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
|
350
|
-
)
|
351
|
-
async def get_resource(
|
352
|
-
txn: Transaction, *, kbid: str, rid: str
|
353
|
-
) -> Optional[ResourceORM]:
|
306
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
307
|
+
async def get_resource(txn: Transaction, *, kbid: str, rid: str) -> Optional["ResourceORM"]:
|
354
308
|
"""
|
355
309
|
Not ideal to return Resource type here but refactoring would
|
356
310
|
require a lot of changes.
|
@@ -362,19 +316,3 @@ async def get_resource(
|
|
362
316
|
|
363
317
|
kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
|
364
318
|
return await kb_orm.get(rid)
|
365
|
-
|
366
|
-
|
367
|
-
@backoff.on_exception(
|
368
|
-
backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
|
369
|
-
)
|
370
|
-
async def get_resource_index_message(
|
371
|
-
txn: Transaction, *, kbid: str, rid: str
|
372
|
-
) -> Optional[noderesources_pb2.Resource]:
|
373
|
-
# prevent circulat imports -- this is not ideal that we have the ORM mix here.
|
374
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
375
|
-
|
376
|
-
kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
|
377
|
-
res = await kb_orm.get(rid)
|
378
|
-
if res is None:
|
379
|
-
return None
|
380
|
-
return (await res.generate_index_message()).brain
|
@@ -21,22 +21,42 @@ import logging
|
|
21
21
|
from typing import AsyncGenerator, Optional
|
22
22
|
|
23
23
|
import orjson
|
24
|
+
from pydantic import BaseModel
|
24
25
|
|
25
26
|
from nucliadb.common.maindb.driver import Transaction
|
27
|
+
from nucliadb_protos import knowledgebox_pb2 as kb_pb2
|
26
28
|
from nucliadb_protos import writer_pb2
|
27
29
|
|
28
|
-
from .utils import get_kv_pb,
|
30
|
+
from .utils import get_kv_pb, with_ro_transaction
|
29
31
|
|
30
32
|
logger = logging.getLogger(__name__)
|
31
33
|
|
34
|
+
KB_ROLLOVER_STATE = "/kbs/{kbid}/rollover/state"
|
32
35
|
KB_ROLLOVER_SHARDS = "/kbs/{kbid}/rollover/shards"
|
36
|
+
KB_ROLLOVER_EXTERNAL_INDEX_METADATA = "/kbs/{kbid}/rollover/external_index_metadata"
|
33
37
|
KB_ROLLOVER_RESOURCES_TO_INDEX = "/kbs/{kbid}/rollover/to-index/{resource}"
|
34
38
|
KB_ROLLOVER_RESOURCES_INDEXED = "/kbs/{kbid}/rollover/indexed/{resource}"
|
35
39
|
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
41
|
+
class RolloverState(BaseModel):
|
42
|
+
rollover_shards_created: bool = False
|
43
|
+
external_index_created: bool = False
|
44
|
+
resources_scheduled: bool = False
|
45
|
+
resources_indexed: bool = False
|
46
|
+
cutover_shards: bool = False
|
47
|
+
cutover_external_index: bool = False
|
48
|
+
resources_validated: bool = False
|
49
|
+
|
50
|
+
|
51
|
+
class RolloverStateNotFoundError(Exception):
|
52
|
+
"""
|
53
|
+
Raised when the rollover state is not found.
|
54
|
+
"""
|
55
|
+
|
56
|
+
...
|
57
|
+
|
58
|
+
|
59
|
+
async def get_kb_rollover_shards(txn: Transaction, *, kbid: str) -> Optional[writer_pb2.Shards]:
|
40
60
|
key = KB_ROLLOVER_SHARDS.format(kbid=kbid)
|
41
61
|
return await get_kv_pb(txn, key, writer_pb2.Shards)
|
42
62
|
|
@@ -106,7 +126,9 @@ async def get_indexed_data(
|
|
106
126
|
val = await txn.get(key)
|
107
127
|
if val is not None:
|
108
128
|
data = orjson.loads(val)
|
109
|
-
|
129
|
+
shard_id: str = data[0]
|
130
|
+
modification_time: int = data[1]
|
131
|
+
return shard_id, modification_time
|
110
132
|
return None
|
111
133
|
|
112
134
|
|
@@ -122,15 +144,13 @@ async def iter_indexed_keys(*, kbid: str) -> AsyncGenerator[str, None]:
|
|
122
144
|
internally managed
|
123
145
|
"""
|
124
146
|
start_key = KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource="")
|
125
|
-
async with
|
126
|
-
async for key in txn.keys(match=start_key
|
147
|
+
async with with_ro_transaction() as txn:
|
148
|
+
async for key in txn.keys(match=start_key):
|
127
149
|
yield key.split("/")[-1]
|
128
150
|
|
129
151
|
|
130
|
-
async def _get_batch_indexed_data(
|
131
|
-
|
132
|
-
) -> list[tuple[str, tuple[str, int]]]:
|
133
|
-
async with with_transaction() as txn:
|
152
|
+
async def _get_batch_indexed_data(*, kbid, batch: list[str]) -> list[tuple[str, tuple[str, int]]]:
|
153
|
+
async with with_ro_transaction() as txn:
|
134
154
|
values = await txn.batch_get(
|
135
155
|
[
|
136
156
|
KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource=resource_id)
|
@@ -140,14 +160,15 @@ async def _get_batch_indexed_data(
|
|
140
160
|
results: list[tuple[str, tuple[str, int]]] = []
|
141
161
|
for key, val in zip(batch, values):
|
142
162
|
if val is not None:
|
143
|
-
|
163
|
+
shard_id: str
|
164
|
+
modification_time: int
|
165
|
+
shard_id, modification_time = orjson.loads(val)
|
166
|
+
data = (shard_id, modification_time)
|
144
167
|
results.append((key.split("/")[-1], data))
|
145
168
|
return results
|
146
169
|
|
147
170
|
|
148
|
-
async def iterate_indexed_data(
|
149
|
-
*, kbid: str
|
150
|
-
) -> AsyncGenerator[tuple[str, tuple[str, int]], None]:
|
171
|
+
async def iterate_indexed_data(*, kbid: str) -> AsyncGenerator[tuple[str, tuple[str, int]], None]:
|
151
172
|
"""
|
152
173
|
This function is optimized for reducing the time a transaction is open.
|
153
174
|
|
@@ -164,3 +185,43 @@ async def iterate_indexed_data(
|
|
164
185
|
if len(batch) > 0:
|
165
186
|
for key, val in await _get_batch_indexed_data(kbid=kbid, batch=batch):
|
166
187
|
yield key, val
|
188
|
+
|
189
|
+
|
190
|
+
async def get_rollover_state(txn: Transaction, kbid: str) -> RolloverState:
|
191
|
+
key = KB_ROLLOVER_STATE.format(kbid=kbid)
|
192
|
+
val = await txn.get(key)
|
193
|
+
if not val:
|
194
|
+
raise RolloverStateNotFoundError(kbid)
|
195
|
+
return RolloverState.model_validate_json(val)
|
196
|
+
|
197
|
+
|
198
|
+
async def set_rollover_state(txn: Transaction, kbid: str, state: RolloverState) -> None:
|
199
|
+
key = KB_ROLLOVER_STATE.format(kbid=kbid)
|
200
|
+
await txn.set(key, state.model_dump_json().encode())
|
201
|
+
|
202
|
+
|
203
|
+
async def clear_rollover_state(txn: Transaction, kbid: str) -> None:
|
204
|
+
key = KB_ROLLOVER_STATE.format(kbid=kbid)
|
205
|
+
await txn.delete(key)
|
206
|
+
|
207
|
+
|
208
|
+
async def update_kb_rollover_external_index_metadata(
|
209
|
+
txn: Transaction, *, kbid: str, metadata: kb_pb2.StoredExternalIndexProviderMetadata
|
210
|
+
) -> None:
|
211
|
+
key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
|
212
|
+
await txn.set(key, metadata.SerializeToString())
|
213
|
+
|
214
|
+
|
215
|
+
async def get_kb_rollover_external_index_metadata(
|
216
|
+
txn: Transaction, *, kbid: str
|
217
|
+
) -> Optional[kb_pb2.StoredExternalIndexProviderMetadata]:
|
218
|
+
key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
|
219
|
+
val = await txn.get(key)
|
220
|
+
if not val:
|
221
|
+
return None
|
222
|
+
return kb_pb2.StoredExternalIndexProviderMetadata.FromString(val)
|
223
|
+
|
224
|
+
|
225
|
+
async def delete_kb_rollover_external_index_metadata(txn: Transaction, *, kbid: str) -> None:
|
226
|
+
key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
|
227
|
+
await txn.delete(key)
|
@@ -29,7 +29,7 @@ KB_SYNONYMS = "/kbs/{kbid}/synonyms"
|
|
29
29
|
|
30
30
|
async def get(txn: Transaction, *, kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
|
31
31
|
key = KB_SYNONYMS.format(kbid=kbid)
|
32
|
-
return await get_kv_pb(txn, key, knowledgebox_pb2.Synonyms)
|
32
|
+
return await get_kv_pb(txn, key, knowledgebox_pb2.Synonyms, for_update=False)
|
33
33
|
|
34
34
|
|
35
35
|
async def set(txn: Transaction, *, kbid: str, synonyms: knowledgebox_pb2.Synonyms):
|
@@ -29,9 +29,9 @@ PB_TYPE = TypeVar("PB_TYPE", bound=Message)
|
|
29
29
|
|
30
30
|
|
31
31
|
async def get_kv_pb(
|
32
|
-
txn: Transaction, key: str, pb_type: Type[PB_TYPE]
|
32
|
+
txn: Transaction, key: str, pb_type: Type[PB_TYPE], for_update: bool = True
|
33
33
|
) -> Optional[PB_TYPE]:
|
34
|
-
serialized: Optional[bytes] = await txn.get(key)
|
34
|
+
serialized: Optional[bytes] = await txn.get(key, for_update=for_update)
|
35
35
|
if serialized is None:
|
36
36
|
return None
|
37
37
|
pb = pb_type()
|
@@ -40,9 +40,18 @@ async def get_kv_pb(
|
|
40
40
|
|
41
41
|
|
42
42
|
@contextlib.asynccontextmanager
|
43
|
-
async def
|
43
|
+
async def with_rw_transaction():
|
44
44
|
driver = get_driver()
|
45
|
-
async with driver.transaction(
|
46
|
-
read_only=read_only, wait_for_abort=wait_for_abort
|
47
|
-
) as txn:
|
45
|
+
async with driver.transaction(read_only=False) as txn:
|
48
46
|
yield txn
|
47
|
+
|
48
|
+
|
49
|
+
# For backwards compatibility
|
50
|
+
with_transaction = with_rw_transaction
|
51
|
+
|
52
|
+
|
53
|
+
@contextlib.asynccontextmanager
|
54
|
+
async def with_ro_transaction():
|
55
|
+
driver = get_driver()
|
56
|
+
async with driver.transaction(read_only=True) as ro_txn:
|
57
|
+
yield ro_txn
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
from typing import AsyncIterator, Optional
|
21
|
+
|
22
|
+
from nucliadb.common.datamanagers.utils import get_kv_pb
|
23
|
+
from nucliadb.common.maindb.driver import Transaction
|
24
|
+
from nucliadb_protos import knowledgebox_pb2
|
25
|
+
|
26
|
+
KB_VECTORSETS = "/kbs/{kbid}/vectorsets"
|
27
|
+
|
28
|
+
|
29
|
+
class BrokenInvariant(Exception):
|
30
|
+
pass
|
31
|
+
|
32
|
+
|
33
|
+
async def initialize(txn: Transaction, *, kbid: str):
|
34
|
+
key = KB_VECTORSETS.format(kbid=kbid)
|
35
|
+
await txn.set(key, knowledgebox_pb2.KnowledgeBoxVectorSetsConfig().SerializeToString())
|
36
|
+
|
37
|
+
|
38
|
+
async def get(
|
39
|
+
txn: Transaction, *, kbid: str, vectorset_id: str
|
40
|
+
) -> Optional[knowledgebox_pb2.VectorSetConfig]:
|
41
|
+
kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
|
42
|
+
index = _find_vectorset(kb_vectorsets, vectorset_id)
|
43
|
+
if index is None:
|
44
|
+
return None
|
45
|
+
return kb_vectorsets.vectorsets[index]
|
46
|
+
|
47
|
+
|
48
|
+
async def exists(txn, *, kbid: str, vectorset_id: str) -> bool:
|
49
|
+
kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
|
50
|
+
return _find_vectorset(kb_vectorsets, vectorset_id) is not None
|
51
|
+
|
52
|
+
|
53
|
+
async def iter(
|
54
|
+
txn: Transaction, *, kbid: str
|
55
|
+
) -> AsyncIterator[tuple[str, knowledgebox_pb2.VectorSetConfig]]:
|
56
|
+
kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
|
57
|
+
for config in kb_vectorsets.vectorsets:
|
58
|
+
yield config.vectorset_id, config
|
59
|
+
|
60
|
+
|
61
|
+
async def set(txn: Transaction, *, kbid: str, config: knowledgebox_pb2.VectorSetConfig):
|
62
|
+
"""Create or update a vectorset configuration"""
|
63
|
+
kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=True)
|
64
|
+
index = _find_vectorset(kb_vectorsets, config.vectorset_id)
|
65
|
+
if index is None:
|
66
|
+
# adding a new vectorset
|
67
|
+
kb_vectorsets.vectorsets.append(config)
|
68
|
+
else:
|
69
|
+
# updating a vectorset
|
70
|
+
kb_vectorsets.vectorsets[index].CopyFrom(config)
|
71
|
+
|
72
|
+
key = KB_VECTORSETS.format(kbid=kbid)
|
73
|
+
await txn.set(key, kb_vectorsets.SerializeToString())
|
74
|
+
|
75
|
+
|
76
|
+
async def delete(txn: Transaction, *, kbid: str, vectorset_id: str):
|
77
|
+
kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=True)
|
78
|
+
index = _find_vectorset(kb_vectorsets, vectorset_id)
|
79
|
+
if index is None:
|
80
|
+
# already deleted
|
81
|
+
return
|
82
|
+
|
83
|
+
del kb_vectorsets.vectorsets[index]
|
84
|
+
key = KB_VECTORSETS.format(kbid=kbid)
|
85
|
+
await txn.set(key, kb_vectorsets.SerializeToString())
|
86
|
+
|
87
|
+
|
88
|
+
# XXX At some point in the vectorset epic, we should make this key mandatory and
|
89
|
+
# fail instead of providing a default
|
90
|
+
async def _get_or_default(
|
91
|
+
txn: Transaction,
|
92
|
+
*,
|
93
|
+
kbid: str,
|
94
|
+
for_update: bool = True,
|
95
|
+
) -> knowledgebox_pb2.KnowledgeBoxVectorSetsConfig:
|
96
|
+
key = KB_VECTORSETS.format(kbid=kbid)
|
97
|
+
stored = await get_kv_pb(
|
98
|
+
txn, key, knowledgebox_pb2.KnowledgeBoxVectorSetsConfig, for_update=for_update
|
99
|
+
)
|
100
|
+
return stored or knowledgebox_pb2.KnowledgeBoxVectorSetsConfig()
|
101
|
+
|
102
|
+
|
103
|
+
def _find_vectorset(
|
104
|
+
kb_vectorsets: knowledgebox_pb2.KnowledgeBoxVectorSetsConfig, vectorset_id: str
|
105
|
+
) -> Optional[int]:
|
106
|
+
"""Return the position of the vectorset in `vectorsets` or `None` if not found."""
|
107
|
+
for idx, vectorset in enumerate(kb_vectorsets.vectorsets):
|
108
|
+
if vectorset.vectorset_id == vectorset_id:
|
109
|
+
return idx
|
110
|
+
return None
|