nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #25 (Fixed migration 24)
|
22
|
+
|
23
|
+
Vectorsets are coming and we need to be ready at nucliadb. Vector index config
|
24
|
+
shouldn't be stored anymore in the `Shards` protobuffer, we need to migrate to
|
25
|
+
the new vectorsets config.
|
26
|
+
|
27
|
+
This migration asks learning_config for each KB configuration and saves the
|
28
|
+
model name as the vectorset_id. Creates a vectorset configuration for each model
|
29
|
+
and deprecates the vectors index config from the `Shards` protobuffer.
|
30
|
+
|
31
|
+
This migration should work for onprem and hosted deployments, as
|
32
|
+
learning_proxy handles which API is used (internal or external)
|
33
|
+
|
34
|
+
"""
|
35
|
+
|
36
|
+
import logging
|
37
|
+
|
38
|
+
from nucliadb import learning_proxy
|
39
|
+
from nucliadb.common import datamanagers
|
40
|
+
from nucliadb.migrator.context import ExecutionContext
|
41
|
+
from nucliadb_protos import (
|
42
|
+
knowledgebox_pb2,
|
43
|
+
nodewriter_pb2,
|
44
|
+
)
|
45
|
+
|
46
|
+
logger = logging.getLogger(__name__)
|
47
|
+
|
48
|
+
|
49
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
50
|
+
|
51
|
+
|
52
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
53
|
+
async with context.kv_driver.transaction(read_only=True) as txn:
|
54
|
+
vectorsets_count = len([vs async for vs in datamanagers.vectorsets.iter(txn, kbid=kbid)])
|
55
|
+
if vectorsets_count > 0:
|
56
|
+
logger.info("Skipping KB with vectorsets already populated", extra={"kbid": kbid})
|
57
|
+
return
|
58
|
+
|
59
|
+
learning_config = await learning_proxy.get_configuration(kbid)
|
60
|
+
if learning_config is None:
|
61
|
+
logger.warning(f"KB has no learning config", extra={"kbid": kbid})
|
62
|
+
return None
|
63
|
+
|
64
|
+
vectorset_id = learning_config.semantic_model
|
65
|
+
learning_model_metadata = learning_config.into_semantic_model_metadata()
|
66
|
+
learning_similarity = learning_model_metadata.similarity_function
|
67
|
+
learning_vector_dimension = learning_model_metadata.vector_dimension
|
68
|
+
learning_matryoshka_dimensions = learning_model_metadata.matryoshka_dimensions
|
69
|
+
learning_normalize_vectors = len(learning_matryoshka_dimensions) > 0
|
70
|
+
|
71
|
+
async with context.kv_driver.transaction(read_only=True) as txn:
|
72
|
+
semantic_model = await datamanagers.kb.get_model_metadata(txn, kbid=kbid)
|
73
|
+
|
74
|
+
maindb_similarity = semantic_model.similarity_function
|
75
|
+
|
76
|
+
maindb_vector_dimension = None
|
77
|
+
if semantic_model.vector_dimension:
|
78
|
+
maindb_vector_dimension = semantic_model.vector_dimension
|
79
|
+
|
80
|
+
maindb_matryoshka_dimensions: list[int] = []
|
81
|
+
if len(semantic_model.matryoshka_dimensions) > 0:
|
82
|
+
maindb_matryoshka_dimensions.extend(semantic_model.matryoshka_dimensions)
|
83
|
+
|
84
|
+
maindb_normalize_vectors = len(maindb_matryoshka_dimensions) > 0
|
85
|
+
|
86
|
+
if (
|
87
|
+
maindb_similarity != learning_similarity
|
88
|
+
or (maindb_vector_dimension is not None and maindb_vector_dimension != learning_vector_dimension)
|
89
|
+
or set(maindb_matryoshka_dimensions) != set(learning_matryoshka_dimensions)
|
90
|
+
or maindb_normalize_vectors != learning_normalize_vectors
|
91
|
+
):
|
92
|
+
logger.error(
|
93
|
+
"KB has mismatched data between nucliadb and learning_config! Please, review manually",
|
94
|
+
extra={"kbid": kbid},
|
95
|
+
)
|
96
|
+
return None
|
97
|
+
|
98
|
+
default_vectorset = knowledgebox_pb2.VectorSetConfig(
|
99
|
+
vectorset_id=vectorset_id,
|
100
|
+
vectorset_index_config=nodewriter_pb2.VectorIndexConfig(
|
101
|
+
vector_dimension=maindb_vector_dimension,
|
102
|
+
similarity=maindb_similarity,
|
103
|
+
vector_type=nodewriter_pb2.VectorType.DENSE_F32, # we only support this for now
|
104
|
+
normalize_vectors=maindb_normalize_vectors,
|
105
|
+
),
|
106
|
+
matryoshka_dimensions=maindb_matryoshka_dimensions,
|
107
|
+
)
|
108
|
+
|
109
|
+
async with context.kv_driver.transaction() as txn:
|
110
|
+
# Populate KB vectorsets with data from learning. We are skipping KBs
|
111
|
+
# with this key already set, so we can set here safely
|
112
|
+
await datamanagers.vectorsets.set(txn, kbid=kbid, config=default_vectorset)
|
113
|
+
await txn.commit()
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #26
|
22
|
+
|
23
|
+
Previously, there was no validation on content types added by users on upload. This caused that in some KBs,
|
24
|
+
there were content types that included random uuids, which caused high cardinality in the content type field.
|
25
|
+
|
26
|
+
This migration will fix those invalid content types.
|
27
|
+
"""
|
28
|
+
|
29
|
+
import logging
|
30
|
+
|
31
|
+
from nucliadb.common import datamanagers
|
32
|
+
from nucliadb.migrator.context import ExecutionContext
|
33
|
+
|
34
|
+
logger = logging.getLogger(__name__)
|
35
|
+
|
36
|
+
|
37
|
+
AFFECTED_KBS = [
|
38
|
+
"78d289e0-dd4d-448c-84b5-8ef0b01a5aba",
|
39
|
+
]
|
40
|
+
|
41
|
+
|
42
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
43
|
+
|
44
|
+
|
45
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
46
|
+
if kbid not in AFFECTED_KBS:
|
47
|
+
return
|
48
|
+
async for rid in datamanagers.resources.iterate_resource_ids(kbid=kbid):
|
49
|
+
async with datamanagers.with_rw_transaction() as txn:
|
50
|
+
basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=rid)
|
51
|
+
if not basic or not basic.icon:
|
52
|
+
continue
|
53
|
+
# We're aiming to fix content types like "multipart/form-data; boundary={uuid}"
|
54
|
+
if "multipart/form-data" not in basic.icon:
|
55
|
+
continue
|
56
|
+
if "boundary=" not in basic.icon:
|
57
|
+
continue
|
58
|
+
logger.info("Fixing content type for resource", extra={"kbid": kbid, "rid": rid})
|
59
|
+
basic.icon = "multipart/form-data"
|
60
|
+
await datamanagers.resources.set_basic(txn, kbid=kbid, rid=rid, basic=basic)
|
61
|
+
await txn.commit()
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #27
|
22
|
+
|
23
|
+
Rollover for nucliadb_texts3
|
24
|
+
"""
|
25
|
+
|
26
|
+
import logging
|
27
|
+
|
28
|
+
from nucliadb import learning_proxy
|
29
|
+
from nucliadb.common import datamanagers
|
30
|
+
from nucliadb.common.cluster.rollover import rollover_kb_index
|
31
|
+
from nucliadb.migrator.context import ExecutionContext
|
32
|
+
|
33
|
+
logger = logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
37
|
+
|
38
|
+
|
39
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
40
|
+
await maybe_fix_vector_dimensions(context, kbid)
|
41
|
+
await rollover_kb_index(context, kbid)
|
42
|
+
|
43
|
+
|
44
|
+
async def maybe_fix_vector_dimensions(context: ExecutionContext, kbid: str) -> None:
|
45
|
+
learning_config = await learning_proxy.get_configuration(kbid)
|
46
|
+
if learning_config is None:
|
47
|
+
logger.warning(f"KB has no learning config", extra={"kbid": kbid})
|
48
|
+
return
|
49
|
+
|
50
|
+
async with context.kv_driver.transaction() as txn:
|
51
|
+
vectorsets = [vs async for vs in datamanagers.vectorsets.iter(txn, kbid=kbid)]
|
52
|
+
if len(vectorsets) != 1:
|
53
|
+
# If multiple vectorsets, they are new shards created correctly, we can safely skip it
|
54
|
+
logger.warning(f"KB has {len(vectorsets)} vectorsets, skipping...", extra={"kbid": kbid})
|
55
|
+
return
|
56
|
+
vectorset = vectorsets[0][1]
|
57
|
+
|
58
|
+
# Correct value, skip
|
59
|
+
if vectorset.vectorset_index_config.vector_dimension != 0:
|
60
|
+
return
|
61
|
+
|
62
|
+
learning_model_metadata = learning_config.into_semantic_model_metadata()
|
63
|
+
logger.info(
|
64
|
+
f"Fixing KB vectorset dimension",
|
65
|
+
extra={
|
66
|
+
"kbid": kbid,
|
67
|
+
"from": vectorset.vectorset_index_config.vector_dimension,
|
68
|
+
"to": learning_model_metadata.vector_dimension,
|
69
|
+
},
|
70
|
+
)
|
71
|
+
vectorset.vectorset_index_config.vector_dimension = learning_model_metadata.vector_dimension
|
72
|
+
|
73
|
+
await datamanagers.vectorsets.set(txn, kbid=kbid, config=vectorset)
|
@@ -17,18 +17,16 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from nucliadb_protos.resources_pb2 import FieldDatetime
|
21
20
|
|
22
|
-
from nucliadb.
|
21
|
+
from nucliadb.common.maindb.pg import PGTransaction
|
23
22
|
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
return await self.db_get_value()
|
24
|
+
async def migrate(txn: PGTransaction) -> None:
|
25
|
+
async with txn.connection.cursor() as cur:
|
26
|
+
# IF NOT EXISTS just for compatibility with older install predating the migration system
|
27
|
+
await cur.execute("""
|
28
|
+
CREATE TABLE IF NOT EXISTS resources (
|
29
|
+
key TEXT PRIMARY KEY,
|
30
|
+
value BYTEA
|
31
|
+
);
|
32
|
+
""")
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from nucliadb.common.maindb.pg import PGTransaction
|
22
|
+
|
23
|
+
|
24
|
+
async def migrate(txn: PGTransaction) -> None:
|
25
|
+
async with txn.connection.cursor() as cur:
|
26
|
+
await cur.execute(r"""
|
27
|
+
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
28
|
+
CREATE EXTENSION IF NOT EXISTS btree_gin;
|
29
|
+
CREATE TABLE catalog (
|
30
|
+
kbid UUID,
|
31
|
+
rid UUID,
|
32
|
+
title TEXT,
|
33
|
+
created_at TIMESTAMP,
|
34
|
+
modified_at TIMESTAMP,
|
35
|
+
labels TEXT[],
|
36
|
+
PRIMARY KEY(kbid, rid)
|
37
|
+
);
|
38
|
+
CREATE INDEX ON catalog USING GIN(kbid, labels);
|
39
|
+
CREATE INDEX ON catalog USING GIN(kbid, regexp_split_to_array(lower(title), '\W'::text));
|
40
|
+
CREATE INDEX ON catalog(kbid, created_at);
|
41
|
+
CREATE INDEX ON catalog(kbid, modified_at);
|
42
|
+
""")
|
@@ -17,8 +17,10 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from nucliadb.ingest.settings import DriverConfig
|
21
20
|
|
21
|
+
from nucliadb.common.maindb.pg import PGTransaction
|
22
22
|
|
23
|
-
|
24
|
-
|
23
|
+
|
24
|
+
async def migrate(txn: PGTransaction) -> None:
|
25
|
+
async with txn.connection.cursor() as cur:
|
26
|
+
await cur.execute("CREATE INDEX ON catalog(kbid);")
|
nucliadb/common/cluster/base.py
CHANGED
@@ -20,16 +20,16 @@
|
|
20
20
|
from abc import ABCMeta, abstractmethod
|
21
21
|
from typing import AsyncIterator, Optional
|
22
22
|
|
23
|
+
from nucliadb_protos import nodereader_pb2, noderesources_pb2, utils_pb2
|
23
24
|
from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
|
24
25
|
from nucliadb_protos.nodewriter_pb2 import (
|
25
26
|
NewShardRequest,
|
26
27
|
NewVectorSetRequest,
|
27
28
|
OpStatus,
|
29
|
+
VectorIndexConfig,
|
28
30
|
)
|
29
31
|
from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
|
30
32
|
|
31
|
-
from nucliadb_protos import nodereader_pb2, noderesources_pb2, utils_pb2
|
32
|
-
|
33
33
|
|
34
34
|
class AbstractIndexNode(metaclass=ABCMeta):
|
35
35
|
label: str = "index-node"
|
@@ -93,15 +93,29 @@ class AbstractIndexNode(metaclass=ABCMeta):
|
|
93
93
|
async def new_shard(
|
94
94
|
self,
|
95
95
|
kbid: str,
|
96
|
-
|
97
|
-
|
98
|
-
|
96
|
+
vector_index_config: VectorIndexConfig,
|
97
|
+
) -> noderesources_pb2.ShardCreated:
|
98
|
+
req = NewShardRequest(
|
99
|
+
kbid=kbid,
|
100
|
+
release_channel=utils_pb2.ReleaseChannel.STABLE,
|
101
|
+
config=vector_index_config,
|
102
|
+
# Deprecated fields, only for backwards compatibility with older nodes
|
103
|
+
similarity=vector_index_config.similarity,
|
104
|
+
normalize_vectors=vector_index_config.normalize_vectors,
|
105
|
+
)
|
106
|
+
|
107
|
+
resp = await self.writer.NewShard(req) # type: ignore
|
108
|
+
return resp
|
109
|
+
|
110
|
+
async def new_shard_with_vectorsets(
|
111
|
+
self,
|
112
|
+
kbid: str,
|
113
|
+
vectorsets_configs: dict[str, VectorIndexConfig],
|
99
114
|
) -> noderesources_pb2.ShardCreated:
|
100
115
|
req = NewShardRequest(
|
101
116
|
kbid=kbid,
|
102
|
-
|
103
|
-
|
104
|
-
normalize_vectors=normalize_vectors,
|
117
|
+
release_channel=utils_pb2.ReleaseChannel.STABLE,
|
118
|
+
vectorsets_configs=vectorsets_configs,
|
105
119
|
)
|
106
120
|
|
107
121
|
resp = await self.writer.NewShard(req) # type: ignore
|
@@ -120,15 +134,15 @@ class AbstractIndexNode(metaclass=ABCMeta):
|
|
120
134
|
self,
|
121
135
|
shard_id: str,
|
122
136
|
vectorset: str,
|
123
|
-
|
124
|
-
similarity: utils_pb2.VectorSimilarity.ValueType = utils_pb2.VectorSimilarity.COSINE,
|
125
|
-
normalize_vectors: bool = False,
|
137
|
+
config: VectorIndexConfig,
|
126
138
|
) -> OpStatus:
|
127
|
-
req = NewVectorSetRequest(
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
139
|
+
req = NewVectorSetRequest(
|
140
|
+
id=noderesources_pb2.VectorSetID(
|
141
|
+
shard=noderesources_pb2.ShardId(id=shard_id), vectorset=vectorset
|
142
|
+
),
|
143
|
+
config=config,
|
144
|
+
)
|
145
|
+
|
132
146
|
resp = await self.writer.AddVectorSet(req) # type: ignore
|
133
147
|
return resp
|
134
148
|
|
@@ -113,7 +113,7 @@ async def _get_index_node_metadata(
|
|
113
113
|
channel = get_traced_grpc_channel(grpc_address, "discovery", variant="_writer")
|
114
114
|
if read_replica:
|
115
115
|
# on a read replica, we need to use the replication service
|
116
|
-
stub = replication_pb2_grpc.ReplicationServiceStub(channel)
|
116
|
+
stub = replication_pb2_grpc.ReplicationServiceStub(channel)
|
117
117
|
else:
|
118
118
|
stub = nodewriter_pb2_grpc.NodeWriterStub(channel) # type: ignore
|
119
119
|
try:
|
@@ -127,9 +127,7 @@ async def _get_index_node_metadata(
|
|
127
127
|
or None
|
128
128
|
)
|
129
129
|
if read_replica and primary_id is None:
|
130
|
-
raise Exception(
|
131
|
-
"Primary node id not found when it is expected to be a read replica"
|
132
|
-
)
|
130
|
+
raise Exception("Primary node id not found when it is expected to be a read replica")
|
133
131
|
|
134
132
|
return IndexNodeMetadata(
|
135
133
|
node_id=metadata.node_id,
|
@@ -141,18 +139,14 @@ async def _get_index_node_metadata(
|
|
141
139
|
)
|
142
140
|
|
143
141
|
|
144
|
-
@backoff.on_exception(
|
145
|
-
|
146
|
-
)
|
147
|
-
async def _get_standalone_index_node_metadata(
|
148
|
-
settings: Settings, address: str
|
149
|
-
) -> IndexNodeMetadata:
|
142
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=4)
|
143
|
+
async def _get_standalone_index_node_metadata(settings: Settings, address: str) -> IndexNodeMetadata:
|
150
144
|
if ":" not in address:
|
151
145
|
grpc_address = f"{address}:{settings.standalone_node_port}"
|
152
146
|
else:
|
153
147
|
grpc_address = address
|
154
148
|
channel = get_traced_grpc_channel(grpc_address, "standalone_proxy")
|
155
|
-
stub = standalone_pb2_grpc.StandaloneClusterServiceStub(channel)
|
149
|
+
stub = standalone_pb2_grpc.StandaloneClusterServiceStub(channel)
|
156
150
|
resp: standalone_pb2.NodeInfoResponse = await stub.NodeInfo(standalone_pb2.NodeInfoRequest()) # type: ignore
|
157
151
|
return IndexNodeMetadata(
|
158
152
|
node_id=resp.id,
|
@@ -177,9 +171,7 @@ class AbstractClusterDiscovery(abc.ABC):
|
|
177
171
|
async def finalize(self) -> None:
|
178
172
|
""" """
|
179
173
|
|
180
|
-
async def _query_node_metadata(
|
181
|
-
self, address: str, read_replica: bool = False
|
182
|
-
) -> IndexNodeMetadata:
|
174
|
+
async def _query_node_metadata(self, address: str, read_replica: bool = False) -> IndexNodeMetadata:
|
183
175
|
if self.settings.standalone_mode:
|
184
176
|
return await _get_standalone_index_node_metadata(self.settings, address)
|
185
177
|
else:
|
@@ -69,9 +69,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
69
69
|
) -> IndexNodeMetadata:
|
70
70
|
async with self.update_lock:
|
71
71
|
if pod_name not in self.node_id_cache:
|
72
|
-
self.node_id_cache[pod_name] = await self._query_node_metadata(
|
73
|
-
node_ip, read_replica
|
74
|
-
)
|
72
|
+
self.node_id_cache[pod_name] = await self._query_node_metadata(node_ip, read_replica)
|
75
73
|
else:
|
76
74
|
self.node_id_cache[pod_name].address = node_ip
|
77
75
|
self.node_id_cache[pod_name].updated_at = time.time()
|
@@ -84,12 +82,10 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
84
82
|
This method will update global node state by utilizing the cluster manager
|
85
83
|
to add or remove nodes.
|
86
84
|
"""
|
87
|
-
status: kubernetes_asyncio.client.models.v1_pod_status.V1PodStatus = event[
|
85
|
+
status: kubernetes_asyncio.client.models.v1_pod_status.V1PodStatus = event["object"].status
|
86
|
+
event_metadata: kubernetes_asyncio.client.models.v1_object_meta.V1ObjectMeta = event[
|
88
87
|
"object"
|
89
|
-
].
|
90
|
-
event_metadata: kubernetes_asyncio.client.models.v1_object_meta.V1ObjectMeta = (
|
91
|
-
event["object"].metadata
|
92
|
-
)
|
88
|
+
].metadata
|
93
89
|
|
94
90
|
ready = status.container_statuses is not None
|
95
91
|
if event["type"] == "DELETED":
|
@@ -199,9 +195,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
199
195
|
except NodeConnectionError: # pragma: no cover
|
200
196
|
pass
|
201
197
|
except Exception: # pragma: no cover
|
202
|
-
logger.exception(
|
203
|
-
"Error while updating node", exc_info=True
|
204
|
-
)
|
198
|
+
logger.exception("Error while updating node", exc_info=True)
|
205
199
|
except (
|
206
200
|
asyncio.CancelledError,
|
207
201
|
KeyboardInterrupt,
|
@@ -259,11 +253,9 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
259
253
|
continue
|
260
254
|
existing = self.node_id_cache[pod_name]
|
261
255
|
try:
|
262
|
-
self.node_id_cache[pod_name] = (
|
263
|
-
|
264
|
-
|
265
|
-
read_replica=existing.primary_id is not None,
|
266
|
-
)
|
256
|
+
self.node_id_cache[pod_name] = await self._query_node_metadata(
|
257
|
+
existing.address,
|
258
|
+
read_replica=existing.primary_id is not None,
|
267
259
|
)
|
268
260
|
except NodeConnectionError: # pragma: no cover
|
269
261
|
self._maybe_remove_stale_node(pod_name)
|
@@ -301,9 +293,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
301
293
|
|
302
294
|
async def initialize(self) -> None:
|
303
295
|
self.cluster_task = asyncio.create_task(self.watch_k8s_for_updates())
|
304
|
-
self.update_node_data_cache_task = asyncio.create_task(
|
305
|
-
self.update_node_data_cache()
|
306
|
-
)
|
296
|
+
self.update_node_data_cache_task = asyncio.create_task(self.update_node_data_cache())
|
307
297
|
await self._wait_ready()
|
308
298
|
|
309
299
|
async def finalize(self) -> None:
|
@@ -46,9 +46,7 @@ class ManualDiscovery(AbstractClusterDiscovery):
|
|
46
46
|
except asyncio.CancelledError:
|
47
47
|
return
|
48
48
|
except Exception:
|
49
|
-
logger.exception(
|
50
|
-
"Error while watching cluster members. Will retry at started interval"
|
51
|
-
)
|
49
|
+
logger.exception("Error while watching cluster members. Will retry at started interval")
|
52
50
|
finally:
|
53
51
|
await asyncio.sleep(15)
|
54
52
|
|
@@ -40,9 +40,7 @@ async def setup_cluster_discovery() -> None:
|
|
40
40
|
# already loaded
|
41
41
|
return util
|
42
42
|
|
43
|
-
klass: Union[
|
44
|
-
Type[ManualDiscovery], Type[KubernetesDiscovery], Type[SingleNodeDiscovery]
|
45
|
-
]
|
43
|
+
klass: Union[Type[ManualDiscovery], Type[KubernetesDiscovery], Type[SingleNodeDiscovery]]
|
46
44
|
if settings.cluster_discovery_mode == ClusterDiscoveryMode.MANUAL:
|
47
45
|
klass = ManualDiscovery
|
48
46
|
elif settings.cluster_discovery_mode == ClusterDiscoveryMode.KUBERNETES:
|
@@ -22,16 +22,15 @@ from typing import Any
|
|
22
22
|
from nucliadb_protos.nodereader_pb2 import (
|
23
23
|
EdgeList,
|
24
24
|
RelationEdge,
|
25
|
-
RelationSearchResponse,
|
26
25
|
)
|
27
|
-
from nucliadb_protos.noderesources_pb2 import EmptyResponse
|
28
|
-
from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
|
29
26
|
from nucliadb_protos.noderesources_pb2 import (
|
27
|
+
EmptyResponse,
|
30
28
|
ShardCreated,
|
31
29
|
ShardId,
|
32
30
|
ShardIds,
|
33
31
|
VectorSetList,
|
34
32
|
)
|
33
|
+
from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
|
35
34
|
from nucliadb_protos.nodewriter_pb2 import OpStatus
|
36
35
|
from nucliadb_protos.utils_pb2 import Relation
|
37
36
|
|
@@ -90,15 +89,8 @@ class DummyReaderStub: # pragma: no cover
|
|
90
89
|
self.calls.setdefault("GetShard", []).append(data)
|
91
90
|
return NodeResourcesShard(shard_id="shard", fields=2, paragraphs=2, sentences=2)
|
92
91
|
|
93
|
-
async def RelationSearch(self, data): # pragma: no cover
|
94
|
-
self.calls.setdefault("RelationSearch", []).append(data)
|
95
|
-
result = RelationSearchResponse()
|
96
|
-
return result
|
97
|
-
|
98
92
|
async def RelationEdges(self, data): # pragma: no cover
|
99
93
|
self.calls.setdefault("RelationEdges", []).append(data)
|
100
94
|
result = EdgeList()
|
101
|
-
result.list.append(
|
102
|
-
RelationEdge(edge_type=Relation.RelationType.ENTITY, property="dummy")
|
103
|
-
)
|
95
|
+
result.list.append(RelationEdge(edge_type=Relation.RelationType.ENTITY, property="dummy"))
|
104
96
|
return result
|
@@ -19,16 +19,13 @@
|
|
19
19
|
#
|
20
20
|
from typing import Optional
|
21
21
|
|
22
|
-
from lru import LRU
|
23
|
-
from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
|
24
|
-
from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
|
22
|
+
from lru import LRU
|
25
23
|
|
26
|
-
from nucliadb.common.cluster.base import AbstractIndexNode
|
27
|
-
from nucliadb.common.cluster.grpc_node_dummy import
|
28
|
-
DummyReaderStub,
|
29
|
-
DummyWriterStub,
|
30
|
-
)
|
24
|
+
from nucliadb.common.cluster.base import AbstractIndexNode
|
25
|
+
from nucliadb.common.cluster.grpc_node_dummy import DummyReaderStub, DummyWriterStub
|
31
26
|
from nucliadb.ingest import SERVICE_NAME
|
27
|
+
from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
|
28
|
+
from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
|
32
29
|
from nucliadb_utils.grpc import get_traced_grpc_channel
|
33
30
|
|
34
31
|
from .settings import settings
|
@@ -41,9 +38,7 @@ class IndexNode(AbstractIndexNode):
|
|
41
38
|
_writer: Optional[NodeWriterStub] = None
|
42
39
|
_reader: Optional[NodeReaderStub] = None
|
43
40
|
|
44
|
-
def _get_service_address(
|
45
|
-
self, port_map: dict[str, int], port: Optional[int]
|
46
|
-
) -> str:
|
41
|
+
def _get_service_address(self, port_map: dict[str, int], port: Optional[int]) -> str:
|
47
42
|
hostname = self.address.split(":")[0]
|
48
43
|
if port is None:
|
49
44
|
# For testing purposes we need to be able to have a writing port
|
@@ -60,10 +55,8 @@ class IndexNode(AbstractIndexNode):
|
|
60
55
|
grpc_address = self._get_service_address(
|
61
56
|
settings.writer_port_map, settings.node_writer_port
|
62
57
|
)
|
63
|
-
channel = get_traced_grpc_channel(
|
64
|
-
|
65
|
-
)
|
66
|
-
WRITE_CONNECTIONS[self.address] = NodeWriterStub(channel) # type: ignore
|
58
|
+
channel = get_traced_grpc_channel(grpc_address, SERVICE_NAME, variant="_writer")
|
59
|
+
WRITE_CONNECTIONS[self.address] = NodeWriterStub(channel)
|
67
60
|
else:
|
68
61
|
WRITE_CONNECTIONS[self.address] = DummyWriterStub()
|
69
62
|
self._writer = WRITE_CONNECTIONS[self.address]
|
@@ -76,10 +69,8 @@ class IndexNode(AbstractIndexNode):
|
|
76
69
|
grpc_address = self._get_service_address(
|
77
70
|
settings.reader_port_map, settings.node_reader_port
|
78
71
|
)
|
79
|
-
channel = get_traced_grpc_channel(
|
80
|
-
|
81
|
-
)
|
82
|
-
READ_CONNECTIONS[self.address] = NodeReaderStub(channel) # type: ignore
|
72
|
+
channel = get_traced_grpc_channel(grpc_address, SERVICE_NAME, variant="_reader")
|
73
|
+
READ_CONNECTIONS[self.address] = NodeReaderStub(channel)
|
83
74
|
else:
|
84
75
|
READ_CONNECTIONS[self.address] = DummyReaderStub()
|
85
76
|
self._reader = READ_CONNECTIONS[self.address]
|