nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,120 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest import mock
|
21
|
-
from unittest.mock import Mock
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
from starlette.requests import Request
|
25
|
-
|
26
|
-
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
27
|
-
from nucliadb.models.responses import HTTPClientError
|
28
|
-
from nucliadb.search import predict
|
29
|
-
from nucliadb.search.api.v1.ask import ask_knowledgebox_endpoint
|
30
|
-
from nucliadb.search.search.exceptions import (
|
31
|
-
IncompleteFindResultsError,
|
32
|
-
InvalidQueryError,
|
33
|
-
)
|
34
|
-
from nucliadb_utils.exceptions import LimitsExceededError
|
35
|
-
|
36
|
-
pytestmark = pytest.mark.asyncio
|
37
|
-
|
38
|
-
|
39
|
-
class DummyTestRequest(Request):
|
40
|
-
@property
|
41
|
-
def auth(self):
|
42
|
-
return Mock(scopes=["READER"])
|
43
|
-
|
44
|
-
@property
|
45
|
-
def user(self):
|
46
|
-
return Mock(display_name="username")
|
47
|
-
|
48
|
-
|
49
|
-
@pytest.fixture(scope="function")
|
50
|
-
def ask_mock():
|
51
|
-
with mock.patch(
|
52
|
-
"nucliadb.search.api.v1.ask.ask",
|
53
|
-
) as mocked:
|
54
|
-
yield mocked
|
55
|
-
|
56
|
-
|
57
|
-
@pytest.mark.parametrize(
|
58
|
-
"ask_error,http_error_response",
|
59
|
-
[
|
60
|
-
(
|
61
|
-
KnowledgeBoxNotFound(),
|
62
|
-
HTTPClientError(status_code=404, detail="Knowledge Box not found."),
|
63
|
-
),
|
64
|
-
(
|
65
|
-
LimitsExceededError(402, "over the quota"),
|
66
|
-
HTTPClientError(status_code=402, detail="over the quota"),
|
67
|
-
),
|
68
|
-
(
|
69
|
-
predict.ProxiedPredictAPIError(status=999, detail="foo"),
|
70
|
-
HTTPClientError(status_code=999, detail="foo"),
|
71
|
-
),
|
72
|
-
(
|
73
|
-
IncompleteFindResultsError(),
|
74
|
-
HTTPClientError(
|
75
|
-
status_code=529,
|
76
|
-
detail="Temporary error on information retrieval. Please try again.",
|
77
|
-
),
|
78
|
-
),
|
79
|
-
(
|
80
|
-
predict.RephraseMissingContextError(),
|
81
|
-
HTTPClientError(
|
82
|
-
status_code=412,
|
83
|
-
detail="Unable to rephrase the query with the provided context.",
|
84
|
-
),
|
85
|
-
),
|
86
|
-
(
|
87
|
-
predict.RephraseError("foobar"),
|
88
|
-
HTTPClientError(
|
89
|
-
status_code=529,
|
90
|
-
detail="Temporary error while rephrasing the query. Please try again later. Error: foobar",
|
91
|
-
),
|
92
|
-
),
|
93
|
-
(
|
94
|
-
InvalidQueryError("foobar", "baz"),
|
95
|
-
HTTPClientError(
|
96
|
-
status_code=412, detail="Invalid query. Error in foobar: baz"
|
97
|
-
),
|
98
|
-
),
|
99
|
-
],
|
100
|
-
)
|
101
|
-
async def test_ask_endpoint_handles_errors(ask_mock, ask_error, http_error_response):
|
102
|
-
ask_mock.side_effect = ask_error
|
103
|
-
request = DummyTestRequest(
|
104
|
-
scope={
|
105
|
-
"type": "http",
|
106
|
-
"http_version": "1.1",
|
107
|
-
"method": "GET",
|
108
|
-
"headers": [],
|
109
|
-
}
|
110
|
-
)
|
111
|
-
response = await ask_knowledgebox_endpoint(
|
112
|
-
request=request,
|
113
|
-
kbid="kbid",
|
114
|
-
item=Mock(),
|
115
|
-
x_ndb_client=None,
|
116
|
-
x_nucliadb_user="",
|
117
|
-
x_forwarded_for="",
|
118
|
-
)
|
119
|
-
assert response.status_code == http_error_response.status_code
|
120
|
-
assert response.body == http_error_response.body
|
@@ -1,96 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest import mock
|
21
|
-
from unittest.mock import Mock
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
from starlette.requests import Request
|
25
|
-
|
26
|
-
from nucliadb.models.responses import HTTPClientError
|
27
|
-
from nucliadb.search import predict
|
28
|
-
from nucliadb.search.api.v1.chat import chat_knowledgebox_endpoint
|
29
|
-
from nucliadb_utils.exceptions import LimitsExceededError
|
30
|
-
|
31
|
-
pytestmark = pytest.mark.asyncio
|
32
|
-
|
33
|
-
|
34
|
-
class DummyTestRequest(Request):
|
35
|
-
@property
|
36
|
-
def auth(self):
|
37
|
-
return Mock(scopes=["READER"])
|
38
|
-
|
39
|
-
@property
|
40
|
-
def user(self):
|
41
|
-
return Mock(display_name="username")
|
42
|
-
|
43
|
-
|
44
|
-
@pytest.fixture(scope="function")
|
45
|
-
def create_chat_response_mock():
|
46
|
-
with mock.patch(
|
47
|
-
"nucliadb.search.api.v1.chat.create_chat_response",
|
48
|
-
) as mocked:
|
49
|
-
yield mocked
|
50
|
-
|
51
|
-
|
52
|
-
@pytest.mark.parametrize(
|
53
|
-
"predict_error,http_error_response",
|
54
|
-
[
|
55
|
-
(
|
56
|
-
LimitsExceededError(402, "over the quota"),
|
57
|
-
HTTPClientError(status_code=402, detail="over the quota"),
|
58
|
-
),
|
59
|
-
(
|
60
|
-
predict.RephraseError("foobar"),
|
61
|
-
HTTPClientError(
|
62
|
-
status_code=529,
|
63
|
-
detail="Temporary error while rephrasing the query. Please try again later. Error: foobar",
|
64
|
-
),
|
65
|
-
),
|
66
|
-
(
|
67
|
-
predict.RephraseMissingContextError(),
|
68
|
-
HTTPClientError(
|
69
|
-
status_code=412,
|
70
|
-
detail="Unable to rephrase the query with the provided context.",
|
71
|
-
),
|
72
|
-
),
|
73
|
-
],
|
74
|
-
)
|
75
|
-
async def test_chat_endpoint_handles_errors(
|
76
|
-
create_chat_response_mock, predict_error, http_error_response
|
77
|
-
):
|
78
|
-
create_chat_response_mock.side_effect = predict_error
|
79
|
-
request = DummyTestRequest(
|
80
|
-
scope={
|
81
|
-
"type": "http",
|
82
|
-
"http_version": "1.1",
|
83
|
-
"method": "GET",
|
84
|
-
"headers": [],
|
85
|
-
}
|
86
|
-
)
|
87
|
-
response = await chat_knowledgebox_endpoint(
|
88
|
-
request=request,
|
89
|
-
kbid="kbid",
|
90
|
-
item=Mock(),
|
91
|
-
x_ndb_client=None,
|
92
|
-
x_nucliadb_user="",
|
93
|
-
x_forwarded_for="",
|
94
|
-
)
|
95
|
-
assert response.status_code == http_error_response.status_code
|
96
|
-
assert response.body == http_error_response.body
|
@@ -1,98 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import json
|
21
|
-
from unittest import mock
|
22
|
-
from unittest.mock import Mock
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
from starlette.requests import Request
|
26
|
-
|
27
|
-
from nucliadb.models.responses import HTTPClientError
|
28
|
-
from nucliadb.search import predict
|
29
|
-
from nucliadb.search.api.v1.predict_proxy import predict_proxy_endpoint
|
30
|
-
from nucliadb.search.search.predict_proxy import PredictProxiedEndpoints
|
31
|
-
from nucliadb_utils.exceptions import LimitsExceededError
|
32
|
-
|
33
|
-
pytestmark = pytest.mark.asyncio
|
34
|
-
|
35
|
-
MODULE = "nucliadb.search.api.v1.predict_proxy"
|
36
|
-
|
37
|
-
|
38
|
-
class DummyTestRequest(Request):
|
39
|
-
@property
|
40
|
-
def auth(self):
|
41
|
-
return Mock(scopes=["READER"])
|
42
|
-
|
43
|
-
@property
|
44
|
-
def user(self):
|
45
|
-
return Mock(display_name="username")
|
46
|
-
|
47
|
-
async def json(self):
|
48
|
-
raise json.JSONDecodeError("test", "test", 0)
|
49
|
-
|
50
|
-
|
51
|
-
@pytest.fixture(scope="function")
|
52
|
-
def dummy_request():
|
53
|
-
return DummyTestRequest(
|
54
|
-
scope={
|
55
|
-
"type": "http",
|
56
|
-
"http_version": "1.1",
|
57
|
-
"method": "GET",
|
58
|
-
"headers": [],
|
59
|
-
"query_string": "",
|
60
|
-
}
|
61
|
-
)
|
62
|
-
|
63
|
-
|
64
|
-
@pytest.fixture(scope="function")
|
65
|
-
def predict_proxy():
|
66
|
-
with mock.patch(f"{MODULE}.predict_proxy") as mocked:
|
67
|
-
yield mocked
|
68
|
-
|
69
|
-
|
70
|
-
@pytest.mark.parametrize(
|
71
|
-
"predict_error,http_error_response",
|
72
|
-
[
|
73
|
-
(
|
74
|
-
LimitsExceededError(402, "over the quota"),
|
75
|
-
HTTPClientError(status_code=402, detail="over the quota"),
|
76
|
-
),
|
77
|
-
(
|
78
|
-
predict.ProxiedPredictAPIError(status=500, detail="Temporary error"),
|
79
|
-
HTTPClientError(
|
80
|
-
status_code=500,
|
81
|
-
detail="Temporary error",
|
82
|
-
),
|
83
|
-
),
|
84
|
-
],
|
85
|
-
)
|
86
|
-
async def test_predict_proxy_endpoint_error_handling(
|
87
|
-
predict_proxy,
|
88
|
-
predict_error,
|
89
|
-
http_error_response,
|
90
|
-
dummy_request,
|
91
|
-
):
|
92
|
-
predict_proxy.side_effect = predict_error
|
93
|
-
response = await predict_proxy_endpoint(
|
94
|
-
request=dummy_request,
|
95
|
-
kbid="kbid",
|
96
|
-
endpoint=PredictProxiedEndpoints.CHAT,
|
97
|
-
)
|
98
|
-
assert response.status_code == http_error_response.status_code
|
@@ -1,99 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest import mock
|
21
|
-
from unittest.mock import Mock
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
from starlette.requests import Request
|
25
|
-
|
26
|
-
from nucliadb.models.responses import HTTPClientError
|
27
|
-
from nucliadb.search import predict
|
28
|
-
from nucliadb.search.api.v1.summarize import summarize_endpoint
|
29
|
-
from nucliadb.search.search.summarize import NoResourcesToSummarize
|
30
|
-
from nucliadb_utils.exceptions import LimitsExceededError
|
31
|
-
|
32
|
-
pytestmark = pytest.mark.asyncio
|
33
|
-
|
34
|
-
|
35
|
-
class DummyTestRequest(Request):
|
36
|
-
@property
|
37
|
-
def auth(self):
|
38
|
-
return Mock(scopes=["READER"])
|
39
|
-
|
40
|
-
@property
|
41
|
-
def user(self):
|
42
|
-
return Mock(display_name="username")
|
43
|
-
|
44
|
-
|
45
|
-
@pytest.fixture(scope="function")
|
46
|
-
def dummy_request():
|
47
|
-
return DummyTestRequest(
|
48
|
-
scope={
|
49
|
-
"type": "http",
|
50
|
-
"http_version": "1.1",
|
51
|
-
"method": "GET",
|
52
|
-
"headers": [],
|
53
|
-
}
|
54
|
-
)
|
55
|
-
|
56
|
-
|
57
|
-
@pytest.fixture(scope="function")
|
58
|
-
def summarize():
|
59
|
-
with mock.patch("nucliadb.search.api.v1.summarize.summarize") as mocked:
|
60
|
-
yield mocked
|
61
|
-
|
62
|
-
|
63
|
-
@pytest.mark.parametrize(
|
64
|
-
"error,http_error_response",
|
65
|
-
[
|
66
|
-
(
|
67
|
-
LimitsExceededError(402, "over the quota"),
|
68
|
-
HTTPClientError(status_code=402, detail="over the quota"),
|
69
|
-
),
|
70
|
-
(
|
71
|
-
predict.ProxiedPredictAPIError(status=500, detail="Temporary error"),
|
72
|
-
HTTPClientError(
|
73
|
-
status_code=500,
|
74
|
-
detail="Temporary error",
|
75
|
-
),
|
76
|
-
),
|
77
|
-
(
|
78
|
-
NoResourcesToSummarize(),
|
79
|
-
HTTPClientError(
|
80
|
-
status_code=412,
|
81
|
-
detail="Could not summarize: No resources or extracted text found.",
|
82
|
-
),
|
83
|
-
),
|
84
|
-
],
|
85
|
-
)
|
86
|
-
async def test_summarize_endpoint_handles_errors(
|
87
|
-
summarize,
|
88
|
-
error,
|
89
|
-
http_error_response,
|
90
|
-
dummy_request,
|
91
|
-
):
|
92
|
-
summarize.side_effect = error
|
93
|
-
response = await summarize_endpoint(
|
94
|
-
request=dummy_request,
|
95
|
-
kbid="kbid",
|
96
|
-
item=Mock(),
|
97
|
-
)
|
98
|
-
assert response.status_code == http_error_response.status_code
|
99
|
-
assert response.body == http_error_response.body
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
@@ -1,211 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
from unittest.mock import AsyncMock, Mock
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
from fastapi import HTTPException
|
24
|
-
from grpc import StatusCode
|
25
|
-
from grpc.aio import AioRpcError
|
26
|
-
|
27
|
-
from nucliadb.common.cluster.base import AbstractIndexNode
|
28
|
-
from nucliadb.search.requesters import utils
|
29
|
-
from nucliadb_protos import nodereader_pb2, writer_pb2
|
30
|
-
from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
|
31
|
-
|
32
|
-
|
33
|
-
@pytest.fixture
|
34
|
-
def fake_nodes():
|
35
|
-
from nucliadb.common.cluster import manager
|
36
|
-
|
37
|
-
original = manager.INDEX_NODES
|
38
|
-
manager.INDEX_NODES.clear()
|
39
|
-
|
40
|
-
manager.add_index_node(
|
41
|
-
id="node-0",
|
42
|
-
address="nohost",
|
43
|
-
shard_count=0,
|
44
|
-
available_disk=100,
|
45
|
-
dummy=True,
|
46
|
-
)
|
47
|
-
manager.add_index_node(
|
48
|
-
id="node-replica-0",
|
49
|
-
address="nohost",
|
50
|
-
shard_count=0,
|
51
|
-
available_disk=100,
|
52
|
-
dummy=True,
|
53
|
-
primary_id="node-0",
|
54
|
-
)
|
55
|
-
|
56
|
-
yield (["node-0"], ["node-replica-0"])
|
57
|
-
|
58
|
-
manager.INDEX_NODES = original
|
59
|
-
|
60
|
-
|
61
|
-
@pytest.fixture
|
62
|
-
def shard_manager():
|
63
|
-
original = get_utility(Utility.SHARD_MANAGER)
|
64
|
-
|
65
|
-
manager = AsyncMock()
|
66
|
-
manager.get_shards_by_kbid = AsyncMock(
|
67
|
-
return_value=[
|
68
|
-
writer_pb2.ShardObject(
|
69
|
-
shard="shard-id",
|
70
|
-
replicas=[
|
71
|
-
writer_pb2.ShardReplica(
|
72
|
-
shard=writer_pb2.ShardCreated(id="shard-id"), node="node-0"
|
73
|
-
)
|
74
|
-
],
|
75
|
-
)
|
76
|
-
]
|
77
|
-
)
|
78
|
-
|
79
|
-
set_utility(Utility.SHARD_MANAGER, manager)
|
80
|
-
|
81
|
-
yield manager
|
82
|
-
|
83
|
-
if original is None:
|
84
|
-
clean_utility(Utility.SHARD_MANAGER)
|
85
|
-
else:
|
86
|
-
set_utility(Utility.SHARD_MANAGER, original)
|
87
|
-
|
88
|
-
|
89
|
-
@pytest.fixture()
|
90
|
-
def search_methods():
|
91
|
-
def fake_search(
|
92
|
-
node: AbstractIndexNode, shard: str, query: nodereader_pb2.SearchRequest
|
93
|
-
):
|
94
|
-
if node.is_read_replica():
|
95
|
-
raise Exception()
|
96
|
-
return nodereader_pb2.SearchResponse()
|
97
|
-
|
98
|
-
original = utils.METHODS
|
99
|
-
utils.METHODS = {
|
100
|
-
utils.Method.SEARCH: AsyncMock(side_effect=fake_search),
|
101
|
-
utils.Method.PARAGRAPH: AsyncMock(),
|
102
|
-
}
|
103
|
-
|
104
|
-
yield utils.METHODS
|
105
|
-
|
106
|
-
utils.METHODS = original
|
107
|
-
|
108
|
-
|
109
|
-
@pytest.mark.asyncio
|
110
|
-
async def test_node_query_retries_primary_if_secondary_fails(
|
111
|
-
fake_nodes,
|
112
|
-
shard_manager,
|
113
|
-
search_methods,
|
114
|
-
):
|
115
|
-
"""Setting up a node and a faulty replica, validate primary is queried if
|
116
|
-
secondary fails.
|
117
|
-
|
118
|
-
"""
|
119
|
-
pb_query = nodereader_pb2.SearchRequest(shard="shard-id", body="question")
|
120
|
-
results, incomplete_results, queried_nodes = await utils.node_query(
|
121
|
-
kbid="my-kbid",
|
122
|
-
method=utils.Method.SEARCH,
|
123
|
-
pb_query=pb_query,
|
124
|
-
use_read_replica_nodes=True,
|
125
|
-
)
|
126
|
-
# secondary fails, primary is called
|
127
|
-
assert search_methods[utils.Method.SEARCH].await_count == 2
|
128
|
-
assert len(queried_nodes) == 2
|
129
|
-
assert queried_nodes[0][0].is_read_replica()
|
130
|
-
assert not queried_nodes[1][0].is_read_replica()
|
131
|
-
|
132
|
-
results, incomplete_results, queried_nodes = await utils.node_query(
|
133
|
-
kbid="my-kbid",
|
134
|
-
method=utils.Method.PARAGRAPH,
|
135
|
-
pb_query=Mock(),
|
136
|
-
use_read_replica_nodes=True,
|
137
|
-
)
|
138
|
-
# secondary succeeds, no fallback call to primary
|
139
|
-
assert search_methods[utils.Method.PARAGRAPH].await_count == 1
|
140
|
-
assert len(queried_nodes) == 1
|
141
|
-
assert queried_nodes[0][0].is_read_replica()
|
142
|
-
|
143
|
-
|
144
|
-
def test_debug_nodes_info(fake_nodes: tuple[list[str], list[str]]):
|
145
|
-
from nucliadb.common.cluster import manager
|
146
|
-
|
147
|
-
primary = manager.get_index_node(fake_nodes[0][0])
|
148
|
-
assert primary is not None
|
149
|
-
secondary = manager.get_index_node(fake_nodes[1][0])
|
150
|
-
assert secondary is not None
|
151
|
-
|
152
|
-
info = utils.debug_nodes_info([(primary, "shard-a"), (secondary, "shard-b")])
|
153
|
-
assert len(info) == 2
|
154
|
-
|
155
|
-
primary_keys = ["id", "shard_id", "address"]
|
156
|
-
secondary_keys = primary_keys + ["primary_id"]
|
157
|
-
|
158
|
-
for key in primary_keys:
|
159
|
-
assert key in info[0]
|
160
|
-
|
161
|
-
for key in secondary_keys:
|
162
|
-
assert key in info[1]
|
163
|
-
|
164
|
-
|
165
|
-
def test_validate_node_query_results():
|
166
|
-
assert utils.validate_node_query_results([Mock()]) is None
|
167
|
-
|
168
|
-
|
169
|
-
def test_validate_node_query_results_no_results():
|
170
|
-
assert isinstance(utils.validate_node_query_results([]), HTTPException)
|
171
|
-
assert isinstance(utils.validate_node_query_results(None), HTTPException)
|
172
|
-
|
173
|
-
|
174
|
-
def test_validate_node_query_results_unhandled_error():
|
175
|
-
error = utils.validate_node_query_results([Exception()])
|
176
|
-
assert isinstance(error, HTTPException)
|
177
|
-
|
178
|
-
|
179
|
-
def test_validate_node_query_results_invalid_query():
|
180
|
-
result = utils.validate_node_query_results(
|
181
|
-
[
|
182
|
-
AioRpcError(
|
183
|
-
code=StatusCode.INTERNAL,
|
184
|
-
initial_metadata=Mock(),
|
185
|
-
trailing_metadata=Mock(),
|
186
|
-
details="An invalid argument was passed: 'Query is invalid. AllButQueryForbidden'",
|
187
|
-
debug_error_string="",
|
188
|
-
)
|
189
|
-
]
|
190
|
-
)
|
191
|
-
|
192
|
-
assert isinstance(result, HTTPException)
|
193
|
-
assert result.status_code == 412
|
194
|
-
assert result.detail == "Query is invalid. AllButQueryForbidden"
|
195
|
-
|
196
|
-
|
197
|
-
def test_validate_node_query_results_internal_unhandled():
|
198
|
-
result = utils.validate_node_query_results(
|
199
|
-
[
|
200
|
-
AioRpcError(
|
201
|
-
code=StatusCode.INTERNAL,
|
202
|
-
initial_metadata=Mock(),
|
203
|
-
trailing_metadata=Mock(),
|
204
|
-
details="There is something wrong with your query, my friend!",
|
205
|
-
debug_error_string="This query is simply wrong",
|
206
|
-
)
|
207
|
-
]
|
208
|
-
)
|
209
|
-
assert isinstance(result, HTTPException)
|
210
|
-
assert result.status_code == 500
|
211
|
-
assert result.detail == "There is something wrong with your query, my friend!"
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|