nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,408 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
from unittest import mock
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
from nucliadb.common.cluster import manager
|
26
|
-
from nucliadb.common.cluster.exceptions import NodeClusterSmall, NoHealthyNodeAvailable
|
27
|
-
from nucliadb.common.cluster.index_node import IndexNode
|
28
|
-
from nucliadb.common.cluster.settings import settings
|
29
|
-
from nucliadb_protos import writer_pb2
|
30
|
-
|
31
|
-
|
32
|
-
@pytest.fixture(scope="function")
|
33
|
-
def available_nodes():
|
34
|
-
nodes = {
|
35
|
-
"node-0": IndexNode(
|
36
|
-
id="node-0", address="node-0", shard_count=1, available_disk=100, dummy=True
|
37
|
-
),
|
38
|
-
"node-30": IndexNode(
|
39
|
-
id="node-30",
|
40
|
-
address="node-30",
|
41
|
-
shard_count=1,
|
42
|
-
available_disk=30,
|
43
|
-
dummy=True,
|
44
|
-
),
|
45
|
-
"node-40": IndexNode(
|
46
|
-
id="node-40",
|
47
|
-
address="node-40",
|
48
|
-
shard_count=1,
|
49
|
-
available_disk=10,
|
50
|
-
dummy=True,
|
51
|
-
),
|
52
|
-
}
|
53
|
-
with mock.patch.object(manager, "INDEX_NODES", new=nodes):
|
54
|
-
yield nodes
|
55
|
-
|
56
|
-
|
57
|
-
def test_sorted_primary_nodes_orders_by_available_disk(available_nodes):
|
58
|
-
with mock.patch.object(settings, "node_replicas", 2):
|
59
|
-
nodes = manager.sorted_primary_nodes()
|
60
|
-
assert nodes == ["node-0", "node-30", "node-40"]
|
61
|
-
|
62
|
-
|
63
|
-
def test_sorted_primary_nodes_avoid_nodes(available_nodes):
|
64
|
-
with mock.patch.object(settings, "node_replicas", 2):
|
65
|
-
excluded_node = "node-0"
|
66
|
-
nodes = manager.sorted_primary_nodes(avoid_nodes=[excluded_node])
|
67
|
-
assert nodes == ["node-30", "node-40", "node-0"]
|
68
|
-
|
69
|
-
# even if all are used, still should find nodes
|
70
|
-
all_nodes = list(available_nodes.keys())
|
71
|
-
assert manager.sorted_primary_nodes(avoid_nodes=all_nodes) == [
|
72
|
-
"node-0",
|
73
|
-
"node-30",
|
74
|
-
"node-40",
|
75
|
-
]
|
76
|
-
|
77
|
-
# check ignore_nodes are ignored while keeping avoid_nodes at the end
|
78
|
-
all_nodes = list(available_nodes.keys())
|
79
|
-
assert manager.sorted_primary_nodes(
|
80
|
-
avoid_nodes=["node-0"], ignore_nodes=["node-30"]
|
81
|
-
) == [
|
82
|
-
"node-40",
|
83
|
-
"node-0",
|
84
|
-
]
|
85
|
-
|
86
|
-
|
87
|
-
def test_check_enough_nodes_raises_error_if_not_enough_nodes_are_found(available_nodes):
|
88
|
-
with mock.patch.object(settings, "node_replicas", 200):
|
89
|
-
with pytest.raises(NodeClusterSmall):
|
90
|
-
manager.check_enough_nodes()
|
91
|
-
|
92
|
-
|
93
|
-
def test_check_enough_nodes_checks_max_node_replicas_only_if_set(available_nodes):
|
94
|
-
with mock.patch.object(settings, "max_node_replicas", 0):
|
95
|
-
with pytest.raises(NodeClusterSmall):
|
96
|
-
manager.check_enough_nodes()
|
97
|
-
|
98
|
-
with mock.patch.object(settings, "max_node_replicas", -1):
|
99
|
-
manager.check_enough_nodes()
|
100
|
-
|
101
|
-
|
102
|
-
def add_index_node(id: str):
|
103
|
-
manager.add_index_node(
|
104
|
-
id=id,
|
105
|
-
address="nohost",
|
106
|
-
shard_count=0,
|
107
|
-
available_disk=100,
|
108
|
-
dummy=True,
|
109
|
-
)
|
110
|
-
|
111
|
-
|
112
|
-
def add_read_replica_node(id: str, primary_id: str):
|
113
|
-
manager.add_index_node(
|
114
|
-
id=id,
|
115
|
-
address="nohost",
|
116
|
-
shard_count=0,
|
117
|
-
available_disk=100,
|
118
|
-
dummy=True,
|
119
|
-
primary_id=primary_id,
|
120
|
-
)
|
121
|
-
|
122
|
-
|
123
|
-
def test_choose_node_with_two_primary_nodes():
|
124
|
-
manager.INDEX_NODES.clear()
|
125
|
-
add_index_node("node-0")
|
126
|
-
add_index_node("node-1")
|
127
|
-
|
128
|
-
node, _ = manager.choose_node(
|
129
|
-
writer_pb2.ShardObject(
|
130
|
-
replicas=[
|
131
|
-
writer_pb2.ShardReplica(
|
132
|
-
shard=writer_pb2.ShardCreated(id="123"), node="node-0"
|
133
|
-
)
|
134
|
-
]
|
135
|
-
)
|
136
|
-
)
|
137
|
-
assert node.id == "node-0"
|
138
|
-
node, _ = manager.choose_node(
|
139
|
-
writer_pb2.ShardObject(
|
140
|
-
replicas=[
|
141
|
-
writer_pb2.ShardReplica(
|
142
|
-
shard=writer_pb2.ShardCreated(id="123"), node="node-1"
|
143
|
-
)
|
144
|
-
]
|
145
|
-
)
|
146
|
-
)
|
147
|
-
assert node.id == "node-1"
|
148
|
-
|
149
|
-
manager.INDEX_NODES.clear()
|
150
|
-
|
151
|
-
|
152
|
-
def test_choose_node_with_two_read_replicas():
|
153
|
-
"""Test choose_node with two replica nodes pointing to two different primary
|
154
|
-
nodes.
|
155
|
-
|
156
|
-
"""
|
157
|
-
manager.INDEX_NODES.clear()
|
158
|
-
add_read_replica_node("node-replica-0", primary_id="node-0")
|
159
|
-
add_read_replica_node("node-replica-1", primary_id="node-1")
|
160
|
-
|
161
|
-
node, _ = manager.choose_node(
|
162
|
-
writer_pb2.ShardObject(
|
163
|
-
replicas=[
|
164
|
-
writer_pb2.ShardReplica(
|
165
|
-
shard=writer_pb2.ShardCreated(id="123"), node="node-0"
|
166
|
-
)
|
167
|
-
]
|
168
|
-
),
|
169
|
-
use_read_replica_nodes=True,
|
170
|
-
)
|
171
|
-
assert node.id == "node-replica-0"
|
172
|
-
node, _ = manager.choose_node(
|
173
|
-
writer_pb2.ShardObject(
|
174
|
-
replicas=[
|
175
|
-
writer_pb2.ShardReplica(
|
176
|
-
shard=writer_pb2.ShardCreated(id="123"), node="node-1"
|
177
|
-
)
|
178
|
-
]
|
179
|
-
),
|
180
|
-
use_read_replica_nodes=True,
|
181
|
-
)
|
182
|
-
assert node.id == "node-replica-1"
|
183
|
-
|
184
|
-
manager.INDEX_NODES.clear()
|
185
|
-
|
186
|
-
|
187
|
-
def test_choose_node_no_healthy_node_available():
|
188
|
-
"""There's only one read replica for node-0 and we try to choose a node for
|
189
|
-
a shard in node-1. We expect it to fail as there's no possible valid node to
|
190
|
-
choose.
|
191
|
-
|
192
|
-
"""
|
193
|
-
manager.INDEX_NODES.clear()
|
194
|
-
add_read_replica_node("node-replica-0", primary_id="node-0")
|
195
|
-
|
196
|
-
with pytest.raises(NoHealthyNodeAvailable):
|
197
|
-
manager.choose_node(
|
198
|
-
writer_pb2.ShardObject(
|
199
|
-
replicas=[
|
200
|
-
writer_pb2.ShardReplica(
|
201
|
-
shard=writer_pb2.ShardCreated(id="123"), node="node-1"
|
202
|
-
)
|
203
|
-
]
|
204
|
-
),
|
205
|
-
use_read_replica_nodes=True,
|
206
|
-
)
|
207
|
-
|
208
|
-
manager.INDEX_NODES.clear()
|
209
|
-
|
210
|
-
|
211
|
-
def repeated_choose_node(
|
212
|
-
count: int, shard: writer_pb2.ShardObject, **kwargs
|
213
|
-
) -> tuple[list[str], list[str]]:
|
214
|
-
shard_ids = []
|
215
|
-
node_ids = []
|
216
|
-
|
217
|
-
for _ in range(count):
|
218
|
-
node, shard_id = manager.choose_node(shard, **kwargs)
|
219
|
-
shard_ids.append(shard_id)
|
220
|
-
node_ids.append(node.id)
|
221
|
-
|
222
|
-
return shard_ids, node_ids
|
223
|
-
|
224
|
-
|
225
|
-
def test_choose_node_with_nodes_and_replicas(standalone_mode_off):
|
226
|
-
"""Validate how choose node selects between different options depending on
|
227
|
-
configuration.
|
228
|
-
|
229
|
-
Choose_node is called multiple times per assert to ensure there
|
230
|
-
is no randomness in the replica/node choice.
|
231
|
-
|
232
|
-
"""
|
233
|
-
TRIES_PER_ASSERT = 10
|
234
|
-
|
235
|
-
shard = writer_pb2.ShardObject(
|
236
|
-
replicas=[
|
237
|
-
writer_pb2.ShardReplica(
|
238
|
-
shard=writer_pb2.ShardCreated(id="123"),
|
239
|
-
node="node-0",
|
240
|
-
),
|
241
|
-
writer_pb2.ShardReplica(
|
242
|
-
shard=writer_pb2.ShardCreated(id="456"),
|
243
|
-
node="node-1",
|
244
|
-
),
|
245
|
-
]
|
246
|
-
)
|
247
|
-
|
248
|
-
# Start with 2 nodes and 1 read replica each
|
249
|
-
manager.INDEX_NODES.clear()
|
250
|
-
add_index_node("node-0")
|
251
|
-
add_index_node("node-1")
|
252
|
-
add_read_replica_node("node-replica-0", primary_id="node-0")
|
253
|
-
add_read_replica_node("node-replica-1", primary_id="node-1")
|
254
|
-
|
255
|
-
# Without read replicas, we only choose primaries
|
256
|
-
shard_ids, node_ids = repeated_choose_node(
|
257
|
-
TRIES_PER_ASSERT, shard, use_read_replica_nodes=False
|
258
|
-
)
|
259
|
-
assert set(shard_ids) == {"123"}
|
260
|
-
assert set(node_ids) == {"node-0"}
|
261
|
-
|
262
|
-
# Secondaries are preferred
|
263
|
-
shard_ids, node_ids = repeated_choose_node(
|
264
|
-
TRIES_PER_ASSERT, shard, use_read_replica_nodes=True
|
265
|
-
)
|
266
|
-
assert set(shard_ids) == {"123"}
|
267
|
-
assert set(node_ids) == {"node-replica-0"}
|
268
|
-
|
269
|
-
# Target replicas take more preference
|
270
|
-
shard_ids, node_ids = repeated_choose_node(
|
271
|
-
TRIES_PER_ASSERT,
|
272
|
-
shard,
|
273
|
-
use_read_replica_nodes=False,
|
274
|
-
target_shard_replicas=["456"],
|
275
|
-
)
|
276
|
-
assert set(shard_ids) == {"456"}
|
277
|
-
assert set(node_ids) == {"node-1"}
|
278
|
-
|
279
|
-
shard_ids, node_ids = repeated_choose_node(
|
280
|
-
TRIES_PER_ASSERT,
|
281
|
-
shard,
|
282
|
-
use_read_replica_nodes=True,
|
283
|
-
target_shard_replicas=["456"],
|
284
|
-
)
|
285
|
-
assert set(shard_ids) == {"456"}
|
286
|
-
assert set(node_ids) == {"node-replica-1"}
|
287
|
-
|
288
|
-
# Let's remove a node so it becomes unavailable, replica keeps working
|
289
|
-
manager.INDEX_NODES.clear()
|
290
|
-
add_index_node("node-0")
|
291
|
-
add_read_replica_node("node-replica-0", primary_id="node-0")
|
292
|
-
add_read_replica_node("node-replica-1", primary_id="node-1")
|
293
|
-
|
294
|
-
shard_ids, node_ids = repeated_choose_node(
|
295
|
-
TRIES_PER_ASSERT, shard, use_read_replica_nodes=False
|
296
|
-
)
|
297
|
-
assert set(shard_ids) == {"123"}
|
298
|
-
assert set(node_ids) == {"node-0"}
|
299
|
-
|
300
|
-
shard_ids, node_ids = repeated_choose_node(
|
301
|
-
TRIES_PER_ASSERT, shard, use_read_replica_nodes=True
|
302
|
-
)
|
303
|
-
assert set(shard_ids) == {"123"}
|
304
|
-
assert set(node_ids) == {"node-replica-0"}
|
305
|
-
|
306
|
-
# target replicas is ignored but only primaries are used
|
307
|
-
shard_ids, node_ids = repeated_choose_node(
|
308
|
-
TRIES_PER_ASSERT,
|
309
|
-
shard,
|
310
|
-
use_read_replica_nodes=False,
|
311
|
-
target_shard_replicas=["456"],
|
312
|
-
)
|
313
|
-
assert set(shard_ids) == {"123"}
|
314
|
-
assert set(node_ids) == {"node-0"}
|
315
|
-
|
316
|
-
shard_ids, node_ids = repeated_choose_node(
|
317
|
-
TRIES_PER_ASSERT,
|
318
|
-
shard,
|
319
|
-
use_read_replica_nodes=True,
|
320
|
-
target_shard_replicas=["456"],
|
321
|
-
)
|
322
|
-
assert set(shard_ids) == {"456"}
|
323
|
-
assert set(node_ids) == {"node-replica-1"}
|
324
|
-
|
325
|
-
# Now let's add again the node but remove the replica
|
326
|
-
manager.INDEX_NODES.clear()
|
327
|
-
add_index_node("node-0")
|
328
|
-
add_index_node("node-1")
|
329
|
-
add_read_replica_node("node-replica-0", primary_id="node-0")
|
330
|
-
|
331
|
-
shard_ids, node_ids = repeated_choose_node(
|
332
|
-
TRIES_PER_ASSERT, shard, use_read_replica_nodes=False
|
333
|
-
)
|
334
|
-
assert set(shard_ids) == {"123"}
|
335
|
-
assert set(node_ids) == {"node-0"}
|
336
|
-
|
337
|
-
shard_ids, node_ids = repeated_choose_node(
|
338
|
-
TRIES_PER_ASSERT, shard, use_read_replica_nodes=True
|
339
|
-
)
|
340
|
-
assert set(shard_ids) == {"123"}
|
341
|
-
assert set(node_ids) == {"node-replica-0"}
|
342
|
-
|
343
|
-
shard_ids, node_ids = repeated_choose_node(
|
344
|
-
TRIES_PER_ASSERT,
|
345
|
-
shard,
|
346
|
-
use_read_replica_nodes=False,
|
347
|
-
target_shard_replicas=["456"],
|
348
|
-
)
|
349
|
-
assert set(shard_ids) == {"456"}
|
350
|
-
assert set(node_ids) == {"node-1"}
|
351
|
-
|
352
|
-
shard_ids, node_ids = repeated_choose_node(
|
353
|
-
TRIES_PER_ASSERT,
|
354
|
-
shard,
|
355
|
-
use_read_replica_nodes=True,
|
356
|
-
target_shard_replicas=["456"],
|
357
|
-
)
|
358
|
-
assert set(shard_ids) == {"456"}
|
359
|
-
assert set(node_ids) == {"node-1"}
|
360
|
-
|
361
|
-
manager.INDEX_NODES.clear()
|
362
|
-
|
363
|
-
|
364
|
-
@pytest.fixture(scope="function")
|
365
|
-
def standalone_mode_off():
|
366
|
-
prev = settings.standalone_mode
|
367
|
-
settings.standalone_mode = False
|
368
|
-
yield
|
369
|
-
settings.standalone_mode = prev
|
370
|
-
|
371
|
-
|
372
|
-
@pytest.fixture(scope="function")
|
373
|
-
def index_nodes():
|
374
|
-
index_nodes = {}
|
375
|
-
with mock.patch.object(manager, "INDEX_NODES", new=index_nodes):
|
376
|
-
yield index_nodes
|
377
|
-
|
378
|
-
|
379
|
-
def test_get_index_nodes(standalone_mode_off, index_nodes):
|
380
|
-
# Add a primary node
|
381
|
-
manager.add_index_node(
|
382
|
-
id="node-0",
|
383
|
-
address="nohost",
|
384
|
-
shard_count=0,
|
385
|
-
available_disk=100,
|
386
|
-
dummy=True,
|
387
|
-
)
|
388
|
-
# Add a secondary replica of node-0
|
389
|
-
manager.add_index_node(
|
390
|
-
id="node-1",
|
391
|
-
address="nohost",
|
392
|
-
shard_count=0,
|
393
|
-
available_disk=100,
|
394
|
-
dummy=True,
|
395
|
-
primary_id="node-0",
|
396
|
-
)
|
397
|
-
|
398
|
-
# By default, only primary nodes are returned
|
399
|
-
nodes = manager.get_index_nodes()
|
400
|
-
assert len(nodes) == 1
|
401
|
-
assert nodes[0].id == "node-0"
|
402
|
-
|
403
|
-
# If we ask for secondary, we get both
|
404
|
-
nodes = manager.get_index_nodes(include_secondary=True)
|
405
|
-
assert len(nodes) == 2
|
406
|
-
sorted(nodes, key=lambda x: x.id)
|
407
|
-
assert nodes[0].id == "node-0"
|
408
|
-
assert nodes[1].id == "node-1"
|
@@ -1,173 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import asyncio
|
21
|
-
import uuid
|
22
|
-
from typing import Any, Optional
|
23
|
-
from unittest.mock import MagicMock
|
24
|
-
|
25
|
-
import pytest
|
26
|
-
|
27
|
-
from nucliadb.common import datamanagers
|
28
|
-
from nucliadb.common.cluster import manager
|
29
|
-
from nucliadb.common.cluster.settings import settings
|
30
|
-
from nucliadb.common.maindb.driver import Transaction
|
31
|
-
from nucliadb_protos import writer_pb2
|
32
|
-
|
33
|
-
|
34
|
-
def test_should_create_new_shard():
|
35
|
-
sm = manager.KBShardManager()
|
36
|
-
low_para_counter = {
|
37
|
-
"num_paragraphs": settings.max_shard_paragraphs - 1,
|
38
|
-
}
|
39
|
-
high_para_counter = {
|
40
|
-
"num_paragraphs": settings.max_shard_paragraphs + 1,
|
41
|
-
}
|
42
|
-
assert sm.should_create_new_shard(**low_para_counter) is False
|
43
|
-
assert sm.should_create_new_shard(**high_para_counter) is True
|
44
|
-
|
45
|
-
|
46
|
-
@pytest.fixture(scope="function")
|
47
|
-
async def fake_node():
|
48
|
-
manager.INDEX_NODES.clear()
|
49
|
-
yield manager.add_index_node(
|
50
|
-
id="node-0",
|
51
|
-
address="nohost",
|
52
|
-
shard_count=0,
|
53
|
-
available_disk=100,
|
54
|
-
dummy=True,
|
55
|
-
)
|
56
|
-
manager.INDEX_NODES.clear()
|
57
|
-
|
58
|
-
|
59
|
-
async def test_standalone_node_garbage_collects(fake_node):
|
60
|
-
mng = manager.StandaloneKBShardManager()
|
61
|
-
|
62
|
-
mng.max_ops_before_checks = 0
|
63
|
-
|
64
|
-
await mng.add_resource(
|
65
|
-
writer_pb2.ShardObject(
|
66
|
-
shard="123",
|
67
|
-
replicas=[
|
68
|
-
writer_pb2.ShardReplica(
|
69
|
-
shard=writer_pb2.ShardCreated(id="123"), node="node-0"
|
70
|
-
)
|
71
|
-
],
|
72
|
-
),
|
73
|
-
resource=MagicMock(),
|
74
|
-
txid=-1,
|
75
|
-
partition=0,
|
76
|
-
kb="kb",
|
77
|
-
)
|
78
|
-
|
79
|
-
await asyncio.sleep(0.05)
|
80
|
-
assert len(fake_node.writer.calls["GC"]) == 1
|
81
|
-
|
82
|
-
|
83
|
-
async def test_shard_creation(fake_index_nodes: list[str], txn: Transaction):
|
84
|
-
"""Given a cluster of index nodes, validate shard creation logic.
|
85
|
-
|
86
|
-
Every logic shard should create a configured amount of indexing replicas and
|
87
|
-
update the information about writable shards.
|
88
|
-
|
89
|
-
"""
|
90
|
-
index_nodes = set(fake_index_nodes)
|
91
|
-
kbid = f"kbid:{test_shard_creation.__name__}"
|
92
|
-
sm = manager.KBShardManager()
|
93
|
-
|
94
|
-
# Fake KB shards instead of creating a KB to generate it
|
95
|
-
shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
|
96
|
-
await datamanagers.cluster.update_kb_shards(
|
97
|
-
txn,
|
98
|
-
kbid=kbid,
|
99
|
-
shards=writer_pb2.Shards(
|
100
|
-
kbid=kbid,
|
101
|
-
),
|
102
|
-
)
|
103
|
-
|
104
|
-
# create first shard
|
105
|
-
await sm.create_shard_by_kbid(txn, kbid)
|
106
|
-
|
107
|
-
shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
|
108
|
-
assert shards is not None
|
109
|
-
assert len(shards.shards) == 1
|
110
|
-
assert shards.shards[0].read_only is False
|
111
|
-
# B/c with Shards.actual
|
112
|
-
assert shards.actual == 0
|
113
|
-
assert set((replica.node for replica in shards.shards[0].replicas)) == index_nodes
|
114
|
-
|
115
|
-
# adding a second shard will mark the first as read only
|
116
|
-
await sm.create_shard_by_kbid(txn, kbid)
|
117
|
-
|
118
|
-
shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
|
119
|
-
assert shards is not None
|
120
|
-
assert len(shards.shards) == 2
|
121
|
-
assert shards.shards[0].read_only is True
|
122
|
-
assert shards.shards[1].read_only is False
|
123
|
-
# B/c with Shards.actual
|
124
|
-
assert shards.actual == 1
|
125
|
-
assert set((replica.node for replica in shards.shards[1].replicas)) == index_nodes
|
126
|
-
|
127
|
-
# adding a third one will be equivalent
|
128
|
-
await sm.create_shard_by_kbid(txn, kbid)
|
129
|
-
|
130
|
-
shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
|
131
|
-
assert shards is not None
|
132
|
-
assert len(shards.shards) == 3
|
133
|
-
assert shards.shards[0].read_only is True
|
134
|
-
assert shards.shards[1].read_only is True
|
135
|
-
assert shards.shards[2].read_only is False
|
136
|
-
# B/c with Shards.actual
|
137
|
-
assert shards.actual == 2
|
138
|
-
assert set((replica.node for replica in shards.shards[1].replicas)) == index_nodes
|
139
|
-
|
140
|
-
|
141
|
-
@pytest.fixture
|
142
|
-
def txn():
|
143
|
-
class MockTransaction:
|
144
|
-
def __init__(self):
|
145
|
-
self.store = {}
|
146
|
-
|
147
|
-
async def get(self, key: str) -> Optional[Any]:
|
148
|
-
return self.store.get(key, None)
|
149
|
-
|
150
|
-
async def set(self, key: str, value: Any):
|
151
|
-
self.store[key] = value
|
152
|
-
|
153
|
-
yield MockTransaction()
|
154
|
-
|
155
|
-
|
156
|
-
@pytest.fixture(scope="function")
|
157
|
-
def fake_index_nodes():
|
158
|
-
assert len(manager.INDEX_NODES) == 0, "Some test isn't cleaning global state!"
|
159
|
-
|
160
|
-
nodes = [f"node-{i}" for i in range(settings.node_replicas)]
|
161
|
-
for node_id in nodes:
|
162
|
-
manager.add_index_node(
|
163
|
-
id=node_id,
|
164
|
-
address=f"nohost-{str(uuid.uuid4())}:1234",
|
165
|
-
shard_count=0,
|
166
|
-
available_disk=100,
|
167
|
-
dummy=True,
|
168
|
-
)
|
169
|
-
|
170
|
-
yield nodes
|
171
|
-
|
172
|
-
for node_id in nodes:
|
173
|
-
manager.remove_index_node(node_id)
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest.mock import MagicMock, patch
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
|
24
|
-
from nucliadb.common import locking
|
25
|
-
from nucliadb.common.cluster.rebalance import run
|
26
|
-
|
27
|
-
|
28
|
-
async def test_run_handles_locked_rebalance():
|
29
|
-
context = MagicMock()
|
30
|
-
with patch(
|
31
|
-
"nucliadb.common.cluster.rebalance.locking.distributed_lock"
|
32
|
-
) as distributed_lock:
|
33
|
-
distributed_lock.side_effect = locking.ResourceLocked("rebalance")
|
34
|
-
await run(context)
|
35
|
-
|
36
|
-
distributed_lock.side_effect = locking.ResourceLocked("other-key")
|
37
|
-
with pytest.raises(locking.ResourceLocked):
|
38
|
-
await run(context)
|