nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -69,9 +69,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
69
69
|
) -> IndexNodeMetadata:
|
70
70
|
async with self.update_lock:
|
71
71
|
if pod_name not in self.node_id_cache:
|
72
|
-
self.node_id_cache[pod_name] = await self._query_node_metadata(
|
73
|
-
node_ip, read_replica
|
74
|
-
)
|
72
|
+
self.node_id_cache[pod_name] = await self._query_node_metadata(node_ip, read_replica)
|
75
73
|
else:
|
76
74
|
self.node_id_cache[pod_name].address = node_ip
|
77
75
|
self.node_id_cache[pod_name].updated_at = time.time()
|
@@ -84,12 +82,10 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
84
82
|
This method will update global node state by utilizing the cluster manager
|
85
83
|
to add or remove nodes.
|
86
84
|
"""
|
87
|
-
status: kubernetes_asyncio.client.models.v1_pod_status.V1PodStatus = event[
|
85
|
+
status: kubernetes_asyncio.client.models.v1_pod_status.V1PodStatus = event["object"].status
|
86
|
+
event_metadata: kubernetes_asyncio.client.models.v1_object_meta.V1ObjectMeta = event[
|
88
87
|
"object"
|
89
|
-
].
|
90
|
-
event_metadata: kubernetes_asyncio.client.models.v1_object_meta.V1ObjectMeta = (
|
91
|
-
event["object"].metadata
|
92
|
-
)
|
88
|
+
].metadata
|
93
89
|
|
94
90
|
ready = status.container_statuses is not None
|
95
91
|
if event["type"] == "DELETED":
|
@@ -137,7 +133,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
137
133
|
if ready:
|
138
134
|
node = manager.get_index_node(node_data.node_id)
|
139
135
|
if node is None:
|
140
|
-
logger.
|
136
|
+
logger.info(
|
141
137
|
"Adding node",
|
142
138
|
extra={
|
143
139
|
"node_id": node_data.node_id,
|
@@ -162,7 +158,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
162
158
|
else:
|
163
159
|
node = manager.get_index_node(node_data.node_id)
|
164
160
|
if node is not None:
|
165
|
-
logger.
|
161
|
+
logger.info(
|
166
162
|
f"Remove node",
|
167
163
|
extra={
|
168
164
|
"node_id": node_data.node_id,
|
@@ -199,9 +195,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
199
195
|
except NodeConnectionError: # pragma: no cover
|
200
196
|
pass
|
201
197
|
except Exception: # pragma: no cover
|
202
|
-
logger.exception(
|
203
|
-
"Error while updating node", exc_info=True
|
204
|
-
)
|
198
|
+
logger.exception("Error while updating node", exc_info=True)
|
205
199
|
except (
|
206
200
|
asyncio.CancelledError,
|
207
201
|
KeyboardInterrupt,
|
@@ -259,9 +253,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
259
253
|
continue
|
260
254
|
existing = self.node_id_cache[pod_name]
|
261
255
|
try:
|
262
|
-
self.node_id_cache[
|
263
|
-
pod_name
|
264
|
-
] = await self._query_node_metadata(
|
256
|
+
self.node_id_cache[pod_name] = await self._query_node_metadata(
|
265
257
|
existing.address,
|
266
258
|
read_replica=existing.primary_id is not None,
|
267
259
|
)
|
@@ -301,9 +293,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
|
|
301
293
|
|
302
294
|
async def initialize(self) -> None:
|
303
295
|
self.cluster_task = asyncio.create_task(self.watch_k8s_for_updates())
|
304
|
-
self.update_node_data_cache_task = asyncio.create_task(
|
305
|
-
self.update_node_data_cache()
|
306
|
-
)
|
296
|
+
self.update_node_data_cache_task = asyncio.create_task(self.update_node_data_cache())
|
307
297
|
await self._wait_ready()
|
308
298
|
|
309
299
|
async def finalize(self) -> None:
|
@@ -46,9 +46,7 @@ class ManualDiscovery(AbstractClusterDiscovery):
|
|
46
46
|
except asyncio.CancelledError:
|
47
47
|
return
|
48
48
|
except Exception:
|
49
|
-
logger.exception(
|
50
|
-
"Error while watching cluster members. Will retry at started interval"
|
51
|
-
)
|
49
|
+
logger.exception("Error while watching cluster members. Will retry at started interval")
|
52
50
|
finally:
|
53
51
|
await asyncio.sleep(15)
|
54
52
|
|
@@ -40,9 +40,7 @@ async def setup_cluster_discovery() -> None:
|
|
40
40
|
# already loaded
|
41
41
|
return util
|
42
42
|
|
43
|
-
klass: Union[
|
44
|
-
Type[ManualDiscovery], Type[KubernetesDiscovery], Type[SingleNodeDiscovery]
|
45
|
-
]
|
43
|
+
klass: Union[Type[ManualDiscovery], Type[KubernetesDiscovery], Type[SingleNodeDiscovery]]
|
46
44
|
if settings.cluster_discovery_mode == ClusterDiscoveryMode.MANUAL:
|
47
45
|
klass = ManualDiscovery
|
48
46
|
elif settings.cluster_discovery_mode == ClusterDiscoveryMode.KUBERNETES:
|
@@ -22,16 +22,15 @@ from typing import Any
|
|
22
22
|
from nucliadb_protos.nodereader_pb2 import (
|
23
23
|
EdgeList,
|
24
24
|
RelationEdge,
|
25
|
-
RelationSearchResponse,
|
26
25
|
)
|
27
|
-
from nucliadb_protos.noderesources_pb2 import EmptyResponse
|
28
|
-
from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
|
29
26
|
from nucliadb_protos.noderesources_pb2 import (
|
27
|
+
EmptyResponse,
|
30
28
|
ShardCreated,
|
31
29
|
ShardId,
|
32
30
|
ShardIds,
|
33
31
|
VectorSetList,
|
34
32
|
)
|
33
|
+
from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
|
35
34
|
from nucliadb_protos.nodewriter_pb2 import OpStatus
|
36
35
|
from nucliadb_protos.utils_pb2 import Relation
|
37
36
|
|
@@ -58,20 +57,23 @@ class DummyWriterStub: # pragma: no cover
|
|
58
57
|
async def SetResource(self, data): # pragma: no cover
|
59
58
|
self.calls.setdefault("SetResource", []).append(data)
|
60
59
|
result = OpStatus()
|
61
|
-
result
|
60
|
+
return result
|
61
|
+
|
62
|
+
async def SetResourceFromStorage(self, data): # pragma: no cover
|
63
|
+
self.calls.setdefault("SetResourceFromStorage", []).append(data)
|
64
|
+
result = OpStatus()
|
62
65
|
return result
|
63
66
|
|
64
67
|
async def AddVectorSet(self, data): # pragma: no cover
|
65
68
|
self.calls.setdefault("AddVectorSet", []).append(data)
|
66
69
|
result = OpStatus()
|
67
|
-
result.field_count = 1
|
68
70
|
return result
|
69
71
|
|
70
|
-
async def
|
71
|
-
self.calls.setdefault("
|
72
|
+
async def ListVectorSets(self, data: ShardId): # pragma: no cover
|
73
|
+
self.calls.setdefault("ListVectorSets", []).append(data)
|
72
74
|
result = VectorSetList()
|
73
75
|
result.shard.id = data.id
|
74
|
-
result.
|
76
|
+
result.vectorsets.append("base")
|
75
77
|
return result
|
76
78
|
|
77
79
|
async def GC(self, request: ShardId) -> EmptyResponse: # pragma: no cover
|
@@ -87,15 +89,8 @@ class DummyReaderStub: # pragma: no cover
|
|
87
89
|
self.calls.setdefault("GetShard", []).append(data)
|
88
90
|
return NodeResourcesShard(shard_id="shard", fields=2, paragraphs=2, sentences=2)
|
89
91
|
|
90
|
-
async def RelationSearch(self, data): # pragma: no cover
|
91
|
-
self.calls.setdefault("RelationSearch", []).append(data)
|
92
|
-
result = RelationSearchResponse()
|
93
|
-
return result
|
94
|
-
|
95
92
|
async def RelationEdges(self, data): # pragma: no cover
|
96
93
|
self.calls.setdefault("RelationEdges", []).append(data)
|
97
94
|
result = EdgeList()
|
98
|
-
result.list.append(
|
99
|
-
RelationEdge(edge_type=Relation.RelationType.ENTITY, property="dummy")
|
100
|
-
)
|
95
|
+
result.list.append(RelationEdge(edge_type=Relation.RelationType.ENTITY, property="dummy"))
|
101
96
|
return result
|
@@ -19,16 +19,13 @@
|
|
19
19
|
#
|
20
20
|
from typing import Optional
|
21
21
|
|
22
|
-
from lru import LRU
|
23
|
-
from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
|
24
|
-
from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
|
22
|
+
from lru import LRU
|
25
23
|
|
26
|
-
from nucliadb.common.cluster.base import AbstractIndexNode
|
27
|
-
from nucliadb.common.cluster.grpc_node_dummy import
|
28
|
-
DummyReaderStub,
|
29
|
-
DummyWriterStub,
|
30
|
-
)
|
24
|
+
from nucliadb.common.cluster.base import AbstractIndexNode
|
25
|
+
from nucliadb.common.cluster.grpc_node_dummy import DummyReaderStub, DummyWriterStub
|
31
26
|
from nucliadb.ingest import SERVICE_NAME
|
27
|
+
from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
|
28
|
+
from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
|
32
29
|
from nucliadb_utils.grpc import get_traced_grpc_channel
|
33
30
|
|
34
31
|
from .settings import settings
|
@@ -41,9 +38,7 @@ class IndexNode(AbstractIndexNode):
|
|
41
38
|
_writer: Optional[NodeWriterStub] = None
|
42
39
|
_reader: Optional[NodeReaderStub] = None
|
43
40
|
|
44
|
-
def _get_service_address(
|
45
|
-
self, port_map: dict[str, int], port: Optional[int]
|
46
|
-
) -> str:
|
41
|
+
def _get_service_address(self, port_map: dict[str, int], port: Optional[int]) -> str:
|
47
42
|
hostname = self.address.split(":")[0]
|
48
43
|
if port is None:
|
49
44
|
# For testing purposes we need to be able to have a writing port
|
@@ -60,10 +55,8 @@ class IndexNode(AbstractIndexNode):
|
|
60
55
|
grpc_address = self._get_service_address(
|
61
56
|
settings.writer_port_map, settings.node_writer_port
|
62
57
|
)
|
63
|
-
channel = get_traced_grpc_channel(
|
64
|
-
|
65
|
-
)
|
66
|
-
WRITE_CONNECTIONS[self.address] = NodeWriterStub(channel) # type: ignore
|
58
|
+
channel = get_traced_grpc_channel(grpc_address, SERVICE_NAME, variant="_writer")
|
59
|
+
WRITE_CONNECTIONS[self.address] = NodeWriterStub(channel)
|
67
60
|
else:
|
68
61
|
WRITE_CONNECTIONS[self.address] = DummyWriterStub()
|
69
62
|
self._writer = WRITE_CONNECTIONS[self.address]
|
@@ -76,10 +69,8 @@ class IndexNode(AbstractIndexNode):
|
|
76
69
|
grpc_address = self._get_service_address(
|
77
70
|
settings.reader_port_map, settings.node_reader_port
|
78
71
|
)
|
79
|
-
channel = get_traced_grpc_channel(
|
80
|
-
|
81
|
-
)
|
82
|
-
READ_CONNECTIONS[self.address] = NodeReaderStub(channel) # type: ignore
|
72
|
+
channel = get_traced_grpc_channel(grpc_address, SERVICE_NAME, variant="_reader")
|
73
|
+
READ_CONNECTIONS[self.address] = NodeReaderStub(channel)
|
83
74
|
else:
|
84
75
|
READ_CONNECTIONS[self.address] = DummyReaderStub()
|
85
76
|
self._reader = READ_CONNECTIONS[self.address]
|