nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""
|
22
|
+
Atomic datamanagers
|
23
|
+
|
24
|
+
This module aims to provide a simple way to call a datamanager function in a
|
25
|
+
single transaction, avoiding the need of encapsulating like in this example:
|
26
|
+
|
27
|
+
```
|
28
|
+
async def <function>(...):
|
29
|
+
async with datamanagers.with_transaction() as txn:
|
30
|
+
await datamanagers.<module>.<function>(...)
|
31
|
+
```
|
32
|
+
|
33
|
+
Or simply a more handy way to call an datamanager operation without caring about
|
34
|
+
it's transaction
|
35
|
+
|
36
|
+
"""
|
37
|
+
|
38
|
+
import sys
|
39
|
+
from functools import wraps
|
40
|
+
|
41
|
+
from . import kb as kb_dm
|
42
|
+
from . import labels as labels_dm
|
43
|
+
from . import resources as resources_dm
|
44
|
+
from . import synonyms as synonyms_dm
|
45
|
+
from .utils import with_ro_transaction, with_transaction
|
46
|
+
|
47
|
+
# XXX: we are using the not exported _ParamSpec to support 3.9. Whenever we
|
48
|
+
# upgrade to >= 3.10 we'll be able to use ParamSpecKwargs and improve the
|
49
|
+
# typing. We are abusing of ParamSpec anywat to better support text editors, so
|
50
|
+
# we also need to ignore some mypy complains
|
51
|
+
|
52
|
+
__python_version = (sys.version_info.major, sys.version_info.minor)
|
53
|
+
if __python_version == (3, 9):
|
54
|
+
from typing_extensions import ParamSpec
|
55
|
+
else:
|
56
|
+
from typing import ParamSpec # type: ignore
|
57
|
+
|
58
|
+
P = ParamSpec("P")
|
59
|
+
|
60
|
+
|
61
|
+
def ro_txn_wrap(fun: P) -> P: # type: ignore
|
62
|
+
@wraps(fun)
|
63
|
+
async def wrapper(**kwargs: P.kwargs):
|
64
|
+
async with with_ro_transaction() as txn:
|
65
|
+
return await fun(txn, **kwargs)
|
66
|
+
|
67
|
+
return wrapper
|
68
|
+
|
69
|
+
|
70
|
+
def rw_txn_wrap(fun: P) -> P: # type: ignore
|
71
|
+
@wraps(fun)
|
72
|
+
async def wrapper(**kwargs: P.kwargs):
|
73
|
+
async with with_transaction() as txn:
|
74
|
+
result = await fun(txn, **kwargs)
|
75
|
+
await txn.commit()
|
76
|
+
return result
|
77
|
+
|
78
|
+
return wrapper
|
79
|
+
|
80
|
+
|
81
|
+
class kb:
|
82
|
+
exists_kb = ro_txn_wrap(kb_dm.exists_kb)
|
83
|
+
get_config = ro_txn_wrap(kb_dm.get_config)
|
84
|
+
get_external_index_provider_metadata = ro_txn_wrap(kb_dm.get_external_index_provider_metadata)
|
85
|
+
|
86
|
+
|
87
|
+
class resources:
|
88
|
+
get_resource_uuid_from_slug = ro_txn_wrap(resources_dm.get_resource_uuid_from_slug)
|
89
|
+
resource_exists = ro_txn_wrap(resources_dm.resource_exists)
|
90
|
+
slug_exists = ro_txn_wrap(resources_dm.slug_exists)
|
91
|
+
|
92
|
+
|
93
|
+
class labelset:
|
94
|
+
get = ro_txn_wrap(labels_dm.get_labelset)
|
95
|
+
set = rw_txn_wrap(labels_dm.set_labelset)
|
96
|
+
delete = rw_txn_wrap(labels_dm.delete_labelset)
|
97
|
+
get_all = ro_txn_wrap(labels_dm.get_labels)
|
98
|
+
|
99
|
+
|
100
|
+
class synonyms:
|
101
|
+
get = ro_txn_wrap(synonyms_dm.get)
|
102
|
+
set = rw_txn_wrap(synonyms_dm.set)
|
@@ -31,13 +31,13 @@ logger = logging.getLogger(__name__)
|
|
31
31
|
KB_SHARDS = "/kbs/{kbid}/shards"
|
32
32
|
|
33
33
|
|
34
|
-
async def get_kb_shards(
|
34
|
+
async def get_kb_shards(
|
35
|
+
txn: Transaction, *, kbid: str, for_update: bool = False
|
36
|
+
) -> Optional[writer_pb2.Shards]:
|
35
37
|
key = KB_SHARDS.format(kbid=kbid)
|
36
|
-
return await get_kv_pb(txn, key, writer_pb2.Shards)
|
38
|
+
return await get_kv_pb(txn, key, writer_pb2.Shards, for_update=for_update)
|
37
39
|
|
38
40
|
|
39
|
-
async def update_kb_shards(
|
40
|
-
txn: Transaction, *, kbid: str, shards: writer_pb2.Shards
|
41
|
-
) -> None:
|
41
|
+
async def update_kb_shards(txn: Transaction, *, kbid: str, shards: writer_pb2.Shards) -> None:
|
42
42
|
key = KB_SHARDS.format(kbid=kbid)
|
43
43
|
await txn.set(key, shards.SerializeToString())
|
@@ -85,11 +85,9 @@ async def set_entities_group(
|
|
85
85
|
await txn.set(key, entities.SerializeToString())
|
86
86
|
|
87
87
|
|
88
|
-
async def iterate_entities_groups(
|
89
|
-
txn: Transaction, *, kbid: str
|
90
|
-
) -> AsyncGenerator[str, None]:
|
88
|
+
async def iterate_entities_groups(txn: Transaction, *, kbid: str) -> AsyncGenerator[str, None]:
|
91
89
|
entities_key = KB_ENTITIES.format(kbid=kbid)
|
92
|
-
async for key in txn.keys(entities_key
|
90
|
+
async for key in txn.keys(entities_key):
|
93
91
|
group = key.split("/")[-1]
|
94
92
|
yield group
|
95
93
|
|
@@ -106,9 +104,7 @@ async def get_entities_group(
|
|
106
104
|
return eg
|
107
105
|
|
108
106
|
|
109
|
-
async def get_deleted_groups(
|
110
|
-
txn: Transaction, *, kbid: str
|
111
|
-
) -> kb_pb2.DeletedEntitiesGroups:
|
107
|
+
async def get_deleted_groups(txn: Transaction, *, kbid: str) -> kb_pb2.DeletedEntitiesGroups:
|
112
108
|
deleted_groups_key = KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid)
|
113
109
|
payload = await txn.get(deleted_groups_key)
|
114
110
|
deg = kb_pb2.DeletedEntitiesGroups()
|
@@ -122,18 +118,14 @@ async def mark_group_as_deleted(txn: Transaction, *, kbid: str, group: str) -> N
|
|
122
118
|
deg = await get_deleted_groups(txn, kbid=kbid)
|
123
119
|
if group not in deg.entities_groups:
|
124
120
|
deg.entities_groups.append(group)
|
125
|
-
await txn.set(
|
126
|
-
KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString()
|
127
|
-
)
|
121
|
+
await txn.set(KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString())
|
128
122
|
|
129
123
|
|
130
124
|
async def unmark_group_as_deleted(txn: Transaction, *, kbid: str, group: str) -> None:
|
131
125
|
deg = await get_deleted_groups(txn, kbid=kbid)
|
132
126
|
if group in deg.entities_groups:
|
133
127
|
deg.entities_groups.remove(group)
|
134
|
-
await txn.set(
|
135
|
-
KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString()
|
136
|
-
)
|
128
|
+
await txn.set(KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString())
|
137
129
|
|
138
130
|
|
139
131
|
async def get_entities_meta_cache(txn: Transaction, *, kbid: str) -> EntitiesMetaCache:
|
@@ -143,7 +135,5 @@ async def get_entities_meta_cache(txn: Transaction, *, kbid: str) -> EntitiesMet
|
|
143
135
|
return pickle.loads(value)
|
144
136
|
|
145
137
|
|
146
|
-
async def set_entities_meta_cache(
|
147
|
-
txn: Transaction, kbid: str, cache: EntitiesMetaCache
|
148
|
-
) -> None:
|
138
|
+
async def set_entities_meta_cache(txn: Transaction, kbid: str, cache: EntitiesMetaCache) -> None:
|
149
139
|
await txn.set(KB_ENTITIES_CACHE.format(kbid=kbid), pickle.dumps(cache, protocol=5))
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from typing import Optional
|
22
|
+
|
23
|
+
from google.protobuf.message import Message
|
24
|
+
|
25
|
+
from nucliadb.common.datamanagers.utils import get_kv_pb
|
26
|
+
from nucliadb.common.maindb.driver import Transaction
|
27
|
+
from nucliadb_protos import writer_pb2
|
28
|
+
|
29
|
+
KB_RESOURCE_FIELD = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}"
|
30
|
+
KB_RESOURCE_FIELD_ERROR = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/error"
|
31
|
+
|
32
|
+
|
33
|
+
async def get_raw(
|
34
|
+
txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
|
35
|
+
) -> Optional[bytes]:
|
36
|
+
key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
37
|
+
return await txn.get(key)
|
38
|
+
|
39
|
+
|
40
|
+
async def set(
|
41
|
+
txn: Transaction,
|
42
|
+
*,
|
43
|
+
kbid: str,
|
44
|
+
rid: str,
|
45
|
+
field_type: str,
|
46
|
+
field_id: str,
|
47
|
+
value: Message,
|
48
|
+
):
|
49
|
+
key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
50
|
+
await txn.set(key, value.SerializeToString())
|
51
|
+
|
52
|
+
|
53
|
+
async def delete(txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str):
|
54
|
+
base_key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
55
|
+
# Make sure we explicitly delete the field and any nested key
|
56
|
+
keys_to_delete = []
|
57
|
+
async for key in txn.keys(base_key):
|
58
|
+
keys_to_delete.append(key)
|
59
|
+
|
60
|
+
for key in keys_to_delete:
|
61
|
+
await txn.delete(key)
|
62
|
+
|
63
|
+
|
64
|
+
# Error
|
65
|
+
|
66
|
+
|
67
|
+
async def get_error(
|
68
|
+
txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
|
69
|
+
) -> Optional[writer_pb2.Error]:
|
70
|
+
key = KB_RESOURCE_FIELD_ERROR.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
71
|
+
return await get_kv_pb(txn, key, writer_pb2.Error)
|
72
|
+
|
73
|
+
|
74
|
+
async def set_error(
|
75
|
+
txn: Transaction,
|
76
|
+
*,
|
77
|
+
kbid: str,
|
78
|
+
rid: str,
|
79
|
+
field_type: str,
|
80
|
+
field_id: str,
|
81
|
+
error: writer_pb2.Error,
|
82
|
+
):
|
83
|
+
key = KB_RESOURCE_FIELD_ERROR.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
84
|
+
await txn.set(key, error.SerializeToString())
|
@@ -33,15 +33,43 @@ KB_SLUGS = KB_SLUGS_BASE + "{slug}"
|
|
33
33
|
logger = logging.getLogger(__name__)
|
34
34
|
|
35
35
|
|
36
|
+
async def get_kbs(txn: Transaction, *, prefix: str = "") -> AsyncIterator[tuple[str, str]]:
|
37
|
+
async for key in txn.keys(KB_SLUGS.format(slug=prefix)):
|
38
|
+
slug = key.replace(KB_SLUGS_BASE, "")
|
39
|
+
uuid = await get_kb_uuid(txn, slug=slug)
|
40
|
+
if uuid is None:
|
41
|
+
logger.error(f"KB with slug ({slug}) but without uuid?")
|
42
|
+
continue
|
43
|
+
yield (uuid, slug)
|
44
|
+
|
45
|
+
|
36
46
|
async def exists_kb(txn: Transaction, *, kbid: str) -> bool:
|
37
|
-
return await get_config(txn, kbid=kbid) is not None
|
47
|
+
return await get_config(txn, kbid=kbid, for_update=False) is not None
|
48
|
+
|
49
|
+
|
50
|
+
async def get_kb_uuid(txn: Transaction, *, slug: str) -> Optional[str]:
|
51
|
+
uuid = await txn.get(KB_SLUGS.format(slug=slug), for_update=False)
|
52
|
+
if uuid is not None:
|
53
|
+
return uuid.decode()
|
54
|
+
else:
|
55
|
+
return None
|
56
|
+
|
57
|
+
|
58
|
+
async def set_kbid_for_slug(txn: Transaction, *, slug: str, kbid: str):
|
59
|
+
key = KB_SLUGS.format(slug=slug)
|
60
|
+
await txn.set(key, kbid.encode())
|
61
|
+
|
62
|
+
|
63
|
+
async def delete_kb_slug(txn: Transaction, *, slug: str):
|
64
|
+
key = KB_SLUGS.format(slug=slug)
|
65
|
+
await txn.delete(key)
|
38
66
|
|
39
67
|
|
40
68
|
async def get_config(
|
41
|
-
txn: Transaction, *, kbid: str
|
69
|
+
txn: Transaction, *, kbid: str, for_update: bool = False
|
42
70
|
) -> Optional[knowledgebox_pb2.KnowledgeBoxConfig]:
|
43
71
|
key = KB_UUID.format(kbid=kbid)
|
44
|
-
payload = await txn.get(key)
|
72
|
+
payload = await txn.get(key, for_update=for_update)
|
45
73
|
if payload is None:
|
46
74
|
return None
|
47
75
|
response = knowledgebox_pb2.KnowledgeBoxConfig()
|
@@ -49,10 +77,18 @@ async def get_config(
|
|
49
77
|
return response
|
50
78
|
|
51
79
|
|
52
|
-
async def
|
53
|
-
|
54
|
-
|
55
|
-
|
80
|
+
async def set_config(txn: Transaction, *, kbid: str, config: knowledgebox_pb2.KnowledgeBoxConfig):
|
81
|
+
key = KB_UUID.format(kbid=kbid)
|
82
|
+
await txn.set(key, config.SerializeToString())
|
83
|
+
|
84
|
+
|
85
|
+
async def delete_config(txn: Transaction, *, kbid: str) -> None:
|
86
|
+
key = KB_UUID.format(kbid=kbid)
|
87
|
+
await txn.delete(key)
|
88
|
+
|
89
|
+
|
90
|
+
async def get_model_metadata(txn: Transaction, *, kbid: str) -> knowledgebox_pb2.SemanticModelMetadata:
|
91
|
+
shards_obj = await cluster.get_kb_shards(txn, kbid=kbid, for_update=False)
|
56
92
|
if shards_obj is None:
|
57
93
|
raise KnowledgeBoxNotFound(kbid)
|
58
94
|
if shards_obj.HasField("model"):
|
@@ -60,26 +96,67 @@ async def get_model_metadata(
|
|
60
96
|
else:
|
61
97
|
# B/c code for old KBs that do not have the `model` attribute set in the Shards object.
|
62
98
|
# Cleanup this code after a migration is done unifying all fields under `model` (on-prem and cloud).
|
63
|
-
return knowledgebox_pb2.SemanticModelMetadata(
|
64
|
-
similarity_function=shards_obj.similarity
|
65
|
-
)
|
99
|
+
return knowledgebox_pb2.SemanticModelMetadata(similarity_function=shards_obj.similarity)
|
66
100
|
|
67
101
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
102
|
+
# DEPRECATED: this function should be removed once the "default" vectorset
|
103
|
+
# concept is removed and processing sends us all messages with a vectorset_id
|
104
|
+
async def get_matryoshka_vector_dimension(
|
105
|
+
txn: Transaction,
|
106
|
+
*,
|
107
|
+
kbid: str,
|
108
|
+
vectorset_id: Optional[str] = None,
|
109
|
+
) -> Optional[int]:
|
110
|
+
"""Return vector dimension for matryoshka models"""
|
111
|
+
from . import vectorsets
|
112
|
+
|
113
|
+
async for _, vs in vectorsets.iter(txn, kbid=kbid):
|
114
|
+
if len(vs.matryoshka_dimensions) > 0 and vs.vectorset_index_config.vector_dimension:
|
115
|
+
if vs.vectorset_index_config.vector_dimension in vs.matryoshka_dimensions:
|
116
|
+
return vs.vectorset_index_config.vector_dimension
|
117
|
+
else:
|
118
|
+
logger.error(
|
119
|
+
"KB has an invalid matryoshka dimension!",
|
120
|
+
extra={
|
121
|
+
"kbid": kbid,
|
122
|
+
"vector_dimension": vs.vectorset_index_config.vector_dimension,
|
123
|
+
"matryoshka_dimensions": vs.matryoshka_dimensions,
|
124
|
+
},
|
125
|
+
)
|
126
|
+
return None
|
72
127
|
else:
|
128
|
+
# fallback for KBs that don't have vectorset
|
129
|
+
model_metadata = await get_model_metadata(txn, kbid=kbid)
|
130
|
+
dimension = None
|
131
|
+
if len(model_metadata.matryoshka_dimensions) > 0 and model_metadata.vector_dimension:
|
132
|
+
if model_metadata.vector_dimension in model_metadata.matryoshka_dimensions:
|
133
|
+
dimension = model_metadata.vector_dimension
|
134
|
+
else:
|
135
|
+
logger.error(
|
136
|
+
"KB has an invalid matryoshka dimension!",
|
137
|
+
extra={
|
138
|
+
"kbid": kbid,
|
139
|
+
"vector_dimension": model_metadata.vector_dimension,
|
140
|
+
"matryoshka_dimensions": model_metadata.matryoshka_dimensions,
|
141
|
+
},
|
142
|
+
)
|
143
|
+
return dimension
|
144
|
+
|
145
|
+
|
146
|
+
async def get_external_index_provider_metadata(
|
147
|
+
txn: Transaction, *, kbid: str
|
148
|
+
) -> Optional[knowledgebox_pb2.StoredExternalIndexProviderMetadata]:
|
149
|
+
kb_config = await get_config(txn, kbid=kbid)
|
150
|
+
if kb_config is None:
|
73
151
|
return None
|
152
|
+
return kb_config.external_index_provider
|
74
153
|
|
75
154
|
|
76
|
-
async def
|
77
|
-
txn: Transaction, *,
|
78
|
-
)
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
continue
|
85
|
-
yield (uuid, slug)
|
155
|
+
async def set_external_index_provider_metadata(
|
156
|
+
txn: Transaction, *, kbid: str, metadata: knowledgebox_pb2.StoredExternalIndexProviderMetadata
|
157
|
+
):
|
158
|
+
kb_config = await get_config(txn, kbid=kbid)
|
159
|
+
if kb_config is None:
|
160
|
+
raise KnowledgeBoxNotFound(kbid)
|
161
|
+
kb_config.external_index_provider.CopyFrom(metadata)
|
162
|
+
await set_config(txn, kbid=kbid, config=kb_config)
|
@@ -17,6 +17,7 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
import logging
|
20
21
|
from typing import Optional
|
21
22
|
|
22
23
|
import orjson
|
@@ -24,6 +25,8 @@ import orjson
|
|
24
25
|
from nucliadb.common.maindb.driver import Transaction
|
25
26
|
from nucliadb_protos import knowledgebox_pb2 as kb_pb2
|
26
27
|
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
27
30
|
KB_LABELS = "/kbs/{kbid}/labels"
|
28
31
|
KB_LABELSET = "/kbs/{kbid}/labels/{id}"
|
29
32
|
KB_LABELSET_IDS = "/kbs/{kbid}/ids-labels"
|
@@ -34,7 +37,9 @@ async def get_labels(txn: Transaction, *, kbid: str) -> kb_pb2.Labels:
|
|
34
37
|
Get all labels for a knowledge box (from multiple labelsets)
|
35
38
|
"""
|
36
39
|
labels = kb_pb2.Labels()
|
37
|
-
labelset_ids = await
|
40
|
+
labelset_ids = await _get_labelset_ids(txn, kbid=kbid)
|
41
|
+
if labelset_ids is None:
|
42
|
+
return labels
|
38
43
|
for labelset_id in labelset_ids:
|
39
44
|
labelset = await txn.get(KB_LABELSET.format(kbid=kbid, id=labelset_id))
|
40
45
|
if not labelset:
|
@@ -45,76 +50,41 @@ async def get_labels(txn: Transaction, *, kbid: str) -> kb_pb2.Labels:
|
|
45
50
|
return labels
|
46
51
|
|
47
52
|
|
48
|
-
async def _get_labelset_ids_bw_compat(txn: Transaction, *, kbid: str) -> list[str]:
|
49
|
-
labelsets = await _get_labelset_ids(txn, kbid=kbid)
|
50
|
-
if labelsets is not None:
|
51
|
-
return labelsets
|
52
|
-
# TODO: Remove this after migration #11
|
53
|
-
return await _deprecated_scan_labelset_ids(txn, kbid=kbid)
|
54
|
-
|
55
|
-
|
56
|
-
async def _deprecated_scan_labelset_ids(txn: Transaction, *, kbid: str) -> list[str]:
|
57
|
-
labelsets = []
|
58
|
-
labels_key = KB_LABELS.format(kbid=kbid)
|
59
|
-
async for key in txn.keys(labels_key, count=-1, include_start=False):
|
60
|
-
lsid = key.split("/")[-1]
|
61
|
-
labelsets.append(lsid)
|
62
|
-
return labelsets
|
63
|
-
|
64
|
-
|
65
53
|
async def _get_labelset_ids(txn: Transaction, *, kbid: str) -> Optional[list[str]]:
|
66
54
|
key = KB_LABELSET_IDS.format(kbid=kbid)
|
67
|
-
data = await txn.get(key)
|
55
|
+
data = await txn.get(key, for_update=True)
|
68
56
|
if not data:
|
69
57
|
return None
|
70
58
|
return orjson.loads(data)
|
71
59
|
|
72
60
|
|
73
|
-
async def _add_to_labelset_ids(
|
74
|
-
|
75
|
-
) -> None:
|
61
|
+
async def _add_to_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
|
62
|
+
updated = set(labelsets)
|
76
63
|
previous = await _get_labelset_ids(txn, kbid=kbid)
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
needs_set = True
|
85
|
-
previous.append(labelset)
|
86
|
-
if needs_set:
|
87
|
-
await _set_labelset_ids(txn, kbid=kbid, labelsets=previous)
|
88
|
-
|
89
|
-
|
90
|
-
async def _delete_from_labelset_ids(
|
91
|
-
txn: Transaction, *, kbid: str, labelsets: list[str]
|
92
|
-
) -> None:
|
93
|
-
needs_set = False
|
64
|
+
if previous is not None:
|
65
|
+
updated.update(previous)
|
66
|
+
if previous is None or previous != updated:
|
67
|
+
await _set_labelset_ids(txn, kbid=kbid, labelsets=list(updated))
|
68
|
+
|
69
|
+
|
70
|
+
async def _delete_from_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
|
94
71
|
previous = await _get_labelset_ids(txn, kbid=kbid)
|
95
72
|
if previous is None:
|
96
|
-
#
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
async def _set_labelset_ids(
|
108
|
-
txn: Transaction, *, kbid: str, labelsets: list[str]
|
109
|
-
) -> None:
|
73
|
+
# Nothing to delete
|
74
|
+
return
|
75
|
+
previous_set = set(previous)
|
76
|
+
updated = previous_set - set(labelsets)
|
77
|
+
if previous_set != updated:
|
78
|
+
await _set_labelset_ids(txn, kbid=kbid, labelsets=list(updated))
|
79
|
+
|
80
|
+
|
81
|
+
async def _set_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
|
110
82
|
key = KB_LABELSET_IDS.format(kbid=kbid)
|
111
83
|
data = orjson.dumps(labelsets)
|
112
84
|
await txn.set(key, data)
|
113
85
|
|
114
86
|
|
115
|
-
async def get_labelset(
|
116
|
-
txn: Transaction, *, kbid: str, labelset_id: str
|
117
|
-
) -> Optional[kb_pb2.LabelSet]:
|
87
|
+
async def get_labelset(txn: Transaction, *, kbid: str, labelset_id: str) -> Optional[kb_pb2.LabelSet]:
|
118
88
|
labelset_key = KB_LABELSET.format(kbid=kbid, id=labelset_id)
|
119
89
|
payload = await txn.get(labelset_key)
|
120
90
|
if payload:
|
@@ -28,9 +28,7 @@ logger = logging.getLogger(__name__)
|
|
28
28
|
PULL_PARTITION_OFFSET = "/processing/pull-offset/{pull_type_id}/{partition}"
|
29
29
|
|
30
30
|
|
31
|
-
async def get_pull_offset(
|
32
|
-
txn: Transaction, *, pull_type_id: str, partition: str
|
33
|
-
) -> Optional[int]:
|
31
|
+
async def get_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str) -> Optional[int]:
|
34
32
|
key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
|
35
33
|
val: Optional[bytes] = await txn.get(key)
|
36
34
|
if val is not None:
|
@@ -38,8 +36,6 @@ async def get_pull_offset(
|
|
38
36
|
return None
|
39
37
|
|
40
38
|
|
41
|
-
async def set_pull_offset(
|
42
|
-
txn: Transaction, *, pull_type_id: str, partition: str, offset: int
|
43
|
-
) -> None:
|
39
|
+
async def set_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str, offset: int) -> None:
|
44
40
|
key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
|
45
41
|
await txn.set(key, str(offset).encode())
|