nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/tests/fixtures.py
DELETED
@@ -1,735 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import logging
|
21
|
-
import os
|
22
|
-
import tempfile
|
23
|
-
from os.path import dirname
|
24
|
-
from typing import AsyncIterator
|
25
|
-
from unittest.mock import Mock
|
26
|
-
|
27
|
-
import asyncpg
|
28
|
-
import pytest
|
29
|
-
import tikv_client # type: ignore
|
30
|
-
from grpc import aio
|
31
|
-
from httpx import AsyncClient
|
32
|
-
from nucliadb_protos.train_pb2_grpc import TrainStub
|
33
|
-
from nucliadb_protos.utils_pb2 import Relation, RelationNode
|
34
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
35
|
-
from nucliadb_protos.writer_pb2_grpc import WriterStub
|
36
|
-
from pytest_lazy_fixtures import lazy_fixture
|
37
|
-
from redis import asyncio as aioredis
|
38
|
-
|
39
|
-
from nucliadb.common.cluster import manager as cluster_manager
|
40
|
-
from nucliadb.common.maindb.driver import Driver
|
41
|
-
from nucliadb.common.maindb.exceptions import UnsetUtility
|
42
|
-
from nucliadb.common.maindb.local import LocalDriver
|
43
|
-
from nucliadb.common.maindb.pg import PGDriver
|
44
|
-
from nucliadb.common.maindb.redis import RedisDriver
|
45
|
-
from nucliadb.common.maindb.tikv import TiKVDriver
|
46
|
-
from nucliadb.common.maindb.utils import get_driver
|
47
|
-
from nucliadb.ingest.settings import DriverConfig, DriverSettings
|
48
|
-
from nucliadb.ingest.settings import settings as ingest_settings
|
49
|
-
from nucliadb.standalone.config import config_nucliadb
|
50
|
-
from nucliadb.standalone.run import run_async_nucliadb
|
51
|
-
from nucliadb.standalone.settings import Settings
|
52
|
-
from nucliadb.tests.utils import inject_message
|
53
|
-
from nucliadb.writer import API_PREFIX
|
54
|
-
from nucliadb_telemetry.logs import setup_logging
|
55
|
-
from nucliadb_telemetry.settings import (
|
56
|
-
LogFormatType,
|
57
|
-
LogLevel,
|
58
|
-
LogOutputType,
|
59
|
-
LogSettings,
|
60
|
-
)
|
61
|
-
from nucliadb_utils.storages.settings import settings as storage_settings
|
62
|
-
from nucliadb_utils.tests import free_port
|
63
|
-
from nucliadb_utils.utilities import (
|
64
|
-
Utility,
|
65
|
-
clean_utility,
|
66
|
-
clear_global_cache,
|
67
|
-
get_utility,
|
68
|
-
set_utility,
|
69
|
-
)
|
70
|
-
|
71
|
-
logger = logging.getLogger(__name__)
|
72
|
-
|
73
|
-
|
74
|
-
@pytest.fixture(scope="function")
|
75
|
-
async def dummy_processing():
|
76
|
-
from nucliadb_utils.settings import nuclia_settings
|
77
|
-
|
78
|
-
nuclia_settings.dummy_processing = True
|
79
|
-
|
80
|
-
|
81
|
-
@pytest.fixture(scope="function", autouse=True)
|
82
|
-
def analytics_disabled():
|
83
|
-
os.environ["NUCLIADB_DISABLE_ANALYTICS"] = "True"
|
84
|
-
yield
|
85
|
-
os.environ.pop("NUCLIADB_DISABLE_ANALYTICS")
|
86
|
-
|
87
|
-
|
88
|
-
def reset_config():
|
89
|
-
from nucliadb.common.cluster import settings as cluster_settings
|
90
|
-
from nucliadb.ingest import settings as ingest_settings
|
91
|
-
from nucliadb.train import settings as train_settings
|
92
|
-
from nucliadb.writer import settings as writer_settings
|
93
|
-
from nucliadb_utils import settings as utils_settings
|
94
|
-
from nucliadb_utils.cache import settings as cache_settings
|
95
|
-
|
96
|
-
all_settings = [
|
97
|
-
cluster_settings.settings,
|
98
|
-
ingest_settings.settings,
|
99
|
-
train_settings.settings,
|
100
|
-
writer_settings.settings,
|
101
|
-
cache_settings.settings,
|
102
|
-
utils_settings.audit_settings,
|
103
|
-
utils_settings.http_settings,
|
104
|
-
utils_settings.indexing_settings,
|
105
|
-
utils_settings.nuclia_settings,
|
106
|
-
utils_settings.nucliadb_settings,
|
107
|
-
utils_settings.storage_settings,
|
108
|
-
utils_settings.transaction_settings,
|
109
|
-
]
|
110
|
-
for settings in all_settings:
|
111
|
-
defaults = type(settings)()
|
112
|
-
for attr, _value in settings:
|
113
|
-
default_value = getattr(defaults, attr)
|
114
|
-
setattr(settings, attr, default_value)
|
115
|
-
|
116
|
-
from nucliadb.common.cluster import manager
|
117
|
-
|
118
|
-
manager.INDEX_NODES.clear()
|
119
|
-
|
120
|
-
|
121
|
-
@pytest.fixture(scope="function")
|
122
|
-
def tmpdir():
|
123
|
-
try:
|
124
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
125
|
-
yield tmpdir
|
126
|
-
except OSError:
|
127
|
-
# Python error on tempfile when tearing down the fixture.
|
128
|
-
# Solved in version 3.11
|
129
|
-
pass
|
130
|
-
|
131
|
-
|
132
|
-
@pytest.fixture(scope="function")
|
133
|
-
async def nucliadb(
|
134
|
-
dummy_processing, analytics_disabled, driver_settings, tmpdir, learning_config
|
135
|
-
):
|
136
|
-
from nucliadb.common.cluster import manager
|
137
|
-
|
138
|
-
manager.INDEX_NODES.clear()
|
139
|
-
|
140
|
-
# we need to force DATA_PATH updates to run every test on the proper
|
141
|
-
# temporary directory
|
142
|
-
data_path = f"{tmpdir}/node"
|
143
|
-
local_files = f"{tmpdir}/blob"
|
144
|
-
os.environ["DATA_PATH"] = data_path
|
145
|
-
|
146
|
-
settings = Settings(
|
147
|
-
file_backend="local",
|
148
|
-
local_files=local_files,
|
149
|
-
data_path=data_path,
|
150
|
-
http_port=free_port(),
|
151
|
-
ingest_grpc_port=free_port(),
|
152
|
-
train_grpc_port=free_port(),
|
153
|
-
standalone_node_port=free_port(),
|
154
|
-
log_format_type=LogFormatType.PLAIN,
|
155
|
-
log_output_type=LogOutputType.FILE,
|
156
|
-
**driver_settings.dict(),
|
157
|
-
)
|
158
|
-
|
159
|
-
config_nucliadb(settings)
|
160
|
-
|
161
|
-
# Make sure tests don't write logs outside of the tmpdir
|
162
|
-
os.environ["ERROR_LOG"] = f"{tmpdir}/logs/error.log"
|
163
|
-
os.environ["ACCESS_LOG"] = f"{tmpdir}/logs/access.log"
|
164
|
-
os.environ["INFO_LOG"] = f"{tmpdir}/logs/info.log"
|
165
|
-
|
166
|
-
setup_logging(
|
167
|
-
settings=LogSettings(
|
168
|
-
log_output_type=LogOutputType.FILE,
|
169
|
-
log_format_type=LogFormatType.PLAIN,
|
170
|
-
debug=False,
|
171
|
-
log_level=LogLevel.WARNING,
|
172
|
-
)
|
173
|
-
)
|
174
|
-
server = await run_async_nucliadb(settings)
|
175
|
-
|
176
|
-
yield settings
|
177
|
-
|
178
|
-
await maybe_cleanup_maindb()
|
179
|
-
|
180
|
-
reset_config()
|
181
|
-
clear_global_cache()
|
182
|
-
await server.shutdown()
|
183
|
-
|
184
|
-
|
185
|
-
@pytest.fixture(scope="function")
|
186
|
-
async def nucliadb_reader(nucliadb: Settings):
|
187
|
-
async with AsyncClient(
|
188
|
-
headers={"X-NUCLIADB-ROLES": "READER"},
|
189
|
-
base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1",
|
190
|
-
) as client:
|
191
|
-
yield client
|
192
|
-
|
193
|
-
|
194
|
-
@pytest.fixture(scope="function")
|
195
|
-
async def nucliadb_writer(nucliadb: Settings):
|
196
|
-
async with AsyncClient(
|
197
|
-
headers={"X-NUCLIADB-ROLES": "WRITER"},
|
198
|
-
base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1",
|
199
|
-
) as client:
|
200
|
-
yield client
|
201
|
-
|
202
|
-
|
203
|
-
@pytest.fixture(scope="function")
|
204
|
-
async def nucliadb_manager(nucliadb: Settings):
|
205
|
-
async with AsyncClient(
|
206
|
-
headers={"X-NUCLIADB-ROLES": "MANAGER"},
|
207
|
-
base_url=f"http://localhost:{nucliadb.http_port}/{API_PREFIX}/v1",
|
208
|
-
) as client:
|
209
|
-
yield client
|
210
|
-
|
211
|
-
|
212
|
-
@pytest.fixture(scope="function")
|
213
|
-
async def knowledgebox(nucliadb_manager: AsyncClient, request):
|
214
|
-
resp = await nucliadb_manager.post(
|
215
|
-
"/kbs", json={"slug": "knowledgebox", "release_channel": request.param}
|
216
|
-
)
|
217
|
-
assert resp.status_code == 201
|
218
|
-
uuid = resp.json().get("uuid")
|
219
|
-
|
220
|
-
yield uuid
|
221
|
-
|
222
|
-
resp = await nucliadb_manager.delete(f"/kb/{uuid}")
|
223
|
-
assert resp.status_code == 200
|
224
|
-
|
225
|
-
|
226
|
-
@pytest.fixture(scope="function")
|
227
|
-
async def nucliadb_grpc(nucliadb: Settings):
|
228
|
-
stub = WriterStub(aio.insecure_channel(f"localhost:{nucliadb.ingest_grpc_port}")) # type: ignore
|
229
|
-
return stub
|
230
|
-
|
231
|
-
|
232
|
-
@pytest.fixture(scope="function")
|
233
|
-
async def nucliadb_train(nucliadb: Settings):
|
234
|
-
stub = TrainStub(aio.insecure_channel(f"localhost:{nucliadb.train_grpc_port}")) # type: ignore
|
235
|
-
return stub
|
236
|
-
|
237
|
-
|
238
|
-
@pytest.fixture(scope="function")
|
239
|
-
async def knowledge_graph(
|
240
|
-
nucliadb_writer: AsyncClient, nucliadb_grpc: WriterStub, knowledgebox
|
241
|
-
):
|
242
|
-
resp = await nucliadb_writer.post(
|
243
|
-
f"/kb/{knowledgebox}/resources",
|
244
|
-
json={
|
245
|
-
"title": "Knowledge graph",
|
246
|
-
"slug": "knowledgegraph",
|
247
|
-
"summary": "Test knowledge graph",
|
248
|
-
},
|
249
|
-
)
|
250
|
-
assert resp.status_code == 201
|
251
|
-
rid = resp.json()["uuid"]
|
252
|
-
|
253
|
-
nodes = {
|
254
|
-
"Animal": RelationNode(
|
255
|
-
value="Animal", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
256
|
-
),
|
257
|
-
"Batman": RelationNode(
|
258
|
-
value="Batman", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
259
|
-
),
|
260
|
-
"Becquer": RelationNode(
|
261
|
-
value="Becquer", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
262
|
-
),
|
263
|
-
"Cat": RelationNode(
|
264
|
-
value="Cat", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
265
|
-
),
|
266
|
-
"Catwoman": RelationNode(
|
267
|
-
value="Catwoman", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
268
|
-
),
|
269
|
-
"Eric": RelationNode(
|
270
|
-
value="Eric", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
271
|
-
),
|
272
|
-
"Fly": RelationNode(
|
273
|
-
value="Fly", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
274
|
-
),
|
275
|
-
"Gravity": RelationNode(
|
276
|
-
value="Gravity", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
277
|
-
),
|
278
|
-
"Joan Antoni": RelationNode(
|
279
|
-
value="Joan Antoni", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
280
|
-
),
|
281
|
-
"Joker": RelationNode(
|
282
|
-
value="Joker", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
283
|
-
),
|
284
|
-
"Newton": RelationNode(
|
285
|
-
value="Newton", ntype=RelationNode.NodeType.ENTITY, subtype="science"
|
286
|
-
),
|
287
|
-
"Isaac Newsome": RelationNode(
|
288
|
-
value="Isaac Newsome", ntype=RelationNode.NodeType.ENTITY, subtype="science"
|
289
|
-
),
|
290
|
-
"Physics": RelationNode(
|
291
|
-
value="Physics", ntype=RelationNode.NodeType.ENTITY, subtype="science"
|
292
|
-
),
|
293
|
-
"Poetry": RelationNode(
|
294
|
-
value="Poetry", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
295
|
-
),
|
296
|
-
"Swallow": RelationNode(
|
297
|
-
value="Swallow", ntype=RelationNode.NodeType.ENTITY, subtype=""
|
298
|
-
),
|
299
|
-
}
|
300
|
-
|
301
|
-
edges = [
|
302
|
-
Relation(
|
303
|
-
relation=Relation.RelationType.ENTITY,
|
304
|
-
source=nodes["Batman"],
|
305
|
-
to=nodes["Catwoman"],
|
306
|
-
relation_label="love",
|
307
|
-
),
|
308
|
-
Relation(
|
309
|
-
relation=Relation.RelationType.ENTITY,
|
310
|
-
source=nodes["Batman"],
|
311
|
-
to=nodes["Joker"],
|
312
|
-
relation_label="fight",
|
313
|
-
),
|
314
|
-
Relation(
|
315
|
-
relation=Relation.RelationType.ENTITY,
|
316
|
-
source=nodes["Joker"],
|
317
|
-
to=nodes["Physics"],
|
318
|
-
relation_label="enjoy",
|
319
|
-
),
|
320
|
-
Relation(
|
321
|
-
relation=Relation.RelationType.ENTITY,
|
322
|
-
source=nodes["Catwoman"],
|
323
|
-
to=nodes["Cat"],
|
324
|
-
relation_label="imitate",
|
325
|
-
),
|
326
|
-
Relation(
|
327
|
-
relation=Relation.RelationType.ENTITY,
|
328
|
-
source=nodes["Cat"],
|
329
|
-
to=nodes["Animal"],
|
330
|
-
relation_label="species",
|
331
|
-
),
|
332
|
-
Relation(
|
333
|
-
relation=Relation.RelationType.ENTITY,
|
334
|
-
source=nodes["Newton"],
|
335
|
-
to=nodes["Physics"],
|
336
|
-
relation_label="study",
|
337
|
-
),
|
338
|
-
Relation(
|
339
|
-
relation=Relation.RelationType.ENTITY,
|
340
|
-
source=nodes["Newton"],
|
341
|
-
to=nodes["Gravity"],
|
342
|
-
relation_label="formulate",
|
343
|
-
),
|
344
|
-
Relation(
|
345
|
-
relation=Relation.RelationType.ENTITY,
|
346
|
-
source=nodes["Isaac Newsome"],
|
347
|
-
to=nodes["Physics"],
|
348
|
-
relation_label="study",
|
349
|
-
),
|
350
|
-
Relation(
|
351
|
-
relation=Relation.RelationType.ENTITY,
|
352
|
-
source=nodes["Isaac Newsome"],
|
353
|
-
to=nodes["Gravity"],
|
354
|
-
relation_label="formulate",
|
355
|
-
),
|
356
|
-
Relation(
|
357
|
-
relation=Relation.RelationType.ENTITY,
|
358
|
-
source=nodes["Eric"],
|
359
|
-
to=nodes["Cat"],
|
360
|
-
relation_label="like",
|
361
|
-
),
|
362
|
-
Relation(
|
363
|
-
relation=Relation.RelationType.ENTITY,
|
364
|
-
source=nodes["Eric"],
|
365
|
-
to=nodes["Joan Antoni"],
|
366
|
-
relation_label="collaborate",
|
367
|
-
),
|
368
|
-
Relation(
|
369
|
-
relation=Relation.RelationType.ENTITY,
|
370
|
-
source=nodes["Joan Antoni"],
|
371
|
-
to=nodes["Eric"],
|
372
|
-
relation_label="collaborate",
|
373
|
-
),
|
374
|
-
Relation(
|
375
|
-
relation=Relation.RelationType.ENTITY,
|
376
|
-
source=nodes["Joan Antoni"],
|
377
|
-
to=nodes["Becquer"],
|
378
|
-
relation_label="read",
|
379
|
-
),
|
380
|
-
Relation(
|
381
|
-
relation=Relation.RelationType.ENTITY,
|
382
|
-
source=nodes["Becquer"],
|
383
|
-
to=nodes["Poetry"],
|
384
|
-
relation_label="write",
|
385
|
-
),
|
386
|
-
Relation(
|
387
|
-
relation=Relation.RelationType.ENTITY,
|
388
|
-
source=nodes["Becquer"],
|
389
|
-
to=nodes["Poetry"],
|
390
|
-
relation_label="like",
|
391
|
-
),
|
392
|
-
Relation(
|
393
|
-
relation=Relation.RelationType.ABOUT,
|
394
|
-
source=nodes["Poetry"],
|
395
|
-
to=nodes["Swallow"],
|
396
|
-
relation_label="about",
|
397
|
-
),
|
398
|
-
Relation(
|
399
|
-
relation=Relation.RelationType.ENTITY,
|
400
|
-
source=nodes["Swallow"],
|
401
|
-
to=nodes["Animal"],
|
402
|
-
relation_label="species",
|
403
|
-
),
|
404
|
-
Relation(
|
405
|
-
relation=Relation.RelationType.ENTITY,
|
406
|
-
source=nodes["Swallow"],
|
407
|
-
to=nodes["Fly"],
|
408
|
-
relation_label="can",
|
409
|
-
),
|
410
|
-
Relation(
|
411
|
-
relation=Relation.RelationType.ENTITY,
|
412
|
-
source=nodes["Fly"],
|
413
|
-
to=nodes["Gravity"],
|
414
|
-
relation_label="defy",
|
415
|
-
),
|
416
|
-
]
|
417
|
-
|
418
|
-
bm = BrokerMessage()
|
419
|
-
bm.uuid = rid
|
420
|
-
bm.kbid = knowledgebox
|
421
|
-
bm.relations.extend(edges)
|
422
|
-
await inject_message(nucliadb_grpc, bm)
|
423
|
-
|
424
|
-
resp = await nucliadb_writer.post(
|
425
|
-
f"/kb/{knowledgebox}/entitiesgroups",
|
426
|
-
json={
|
427
|
-
"title": "scientist",
|
428
|
-
"color": "",
|
429
|
-
"entities": {
|
430
|
-
"Isaac": {"value": "Isaac"},
|
431
|
-
"Isaac Newton": {"value": "Isaac Newton", "represents": ["Newton"]},
|
432
|
-
"Isaac Newsome": {"value": "Isaac Newsome"},
|
433
|
-
},
|
434
|
-
"custom": True,
|
435
|
-
"group": "scientist",
|
436
|
-
},
|
437
|
-
)
|
438
|
-
assert resp.status_code == 200, resp.content
|
439
|
-
resp = await nucliadb_writer.patch(
|
440
|
-
f"/kb/{knowledgebox}/entitiesgroup/scientist",
|
441
|
-
json={"add": {}, "update": {}, "delete": ["Isaac Newsome"]},
|
442
|
-
)
|
443
|
-
assert resp.status_code == 200, resp.content
|
444
|
-
resp = await nucliadb_writer.post(
|
445
|
-
f"/kb/{knowledgebox}/entitiesgroups",
|
446
|
-
json={
|
447
|
-
"title": "poet",
|
448
|
-
"color": "",
|
449
|
-
"entities": {
|
450
|
-
"Becquer": {
|
451
|
-
"value": "Becquer",
|
452
|
-
"represents": ["Gustavo Adolfo Bécquer"],
|
453
|
-
},
|
454
|
-
"Gustavo Adolfo Bécquer": {"value": "Gustavo Adolfo Bécquer"},
|
455
|
-
},
|
456
|
-
"custom": True,
|
457
|
-
"group": "poet",
|
458
|
-
},
|
459
|
-
)
|
460
|
-
assert resp.status_code == 200, resp.content
|
461
|
-
|
462
|
-
return (nodes, edges)
|
463
|
-
|
464
|
-
|
465
|
-
@pytest.fixture(scope="function")
|
466
|
-
async def stream_audit(natsd: str):
|
467
|
-
from nucliadb_utils.audit.stream import StreamAuditStorage
|
468
|
-
from nucliadb_utils.settings import audit_settings
|
469
|
-
|
470
|
-
audit = StreamAuditStorage(
|
471
|
-
[natsd],
|
472
|
-
audit_settings.audit_jetstream_target, # type: ignore
|
473
|
-
audit_settings.audit_partitions,
|
474
|
-
audit_settings.audit_hash_seed,
|
475
|
-
)
|
476
|
-
await audit.initialize()
|
477
|
-
yield audit
|
478
|
-
await audit.finalize()
|
479
|
-
|
480
|
-
|
481
|
-
@pytest.fixture(scope="function")
|
482
|
-
def predict_mock() -> Mock: # type: ignore
|
483
|
-
predict = get_utility(Utility.PREDICT)
|
484
|
-
mock = Mock()
|
485
|
-
set_utility(Utility.PREDICT, mock)
|
486
|
-
|
487
|
-
yield mock
|
488
|
-
|
489
|
-
if predict is None:
|
490
|
-
clean_utility(Utility.PREDICT)
|
491
|
-
else:
|
492
|
-
set_utility(Utility.PREDICT, predict)
|
493
|
-
|
494
|
-
|
495
|
-
@pytest.fixture(scope="function")
|
496
|
-
def metrics_registry():
|
497
|
-
import prometheus_client.registry # type: ignore
|
498
|
-
|
499
|
-
for collector in prometheus_client.registry.REGISTRY._names_to_collectors.values():
|
500
|
-
if not hasattr(collector, "_metrics"):
|
501
|
-
continue
|
502
|
-
collector._metrics.clear()
|
503
|
-
yield prometheus_client.registry.REGISTRY
|
504
|
-
|
505
|
-
|
506
|
-
@pytest.fixture(scope="function")
|
507
|
-
async def redis_config(redis):
|
508
|
-
ingest_settings.driver_redis_url = f"redis://{redis[0]}:{redis[1]}"
|
509
|
-
default_driver = ingest_settings.driver
|
510
|
-
|
511
|
-
ingest_settings.driver = "redis"
|
512
|
-
|
513
|
-
storage_settings.local_testing_files = f"{dirname(__file__)}"
|
514
|
-
driver = aioredis.from_url(f"redis://{redis[0]}:{redis[1]}")
|
515
|
-
await driver.flushall()
|
516
|
-
|
517
|
-
yield ingest_settings.driver_redis_url
|
518
|
-
|
519
|
-
ingest_settings.driver_redis_url = None
|
520
|
-
ingest_settings.driver = default_driver
|
521
|
-
await driver.flushall()
|
522
|
-
await driver.close(close_connection_pool=True)
|
523
|
-
|
524
|
-
pubsub = get_utility(Utility.PUBSUB)
|
525
|
-
if pubsub is not None:
|
526
|
-
await pubsub.finalize()
|
527
|
-
|
528
|
-
|
529
|
-
@pytest.fixture(scope="function")
|
530
|
-
def local_driver_settings(tmpdir):
|
531
|
-
return DriverSettings(
|
532
|
-
driver=DriverConfig.LOCAL,
|
533
|
-
driver_local_url=f"{tmpdir}/main",
|
534
|
-
)
|
535
|
-
|
536
|
-
|
537
|
-
@pytest.fixture(scope="function")
|
538
|
-
async def local_driver(local_driver_settings) -> AsyncIterator[Driver]:
|
539
|
-
path = local_driver_settings.driver_local_url
|
540
|
-
ingest_settings.driver = DriverConfig.LOCAL
|
541
|
-
ingest_settings.driver_local_url = path
|
542
|
-
|
543
|
-
driver: Driver = LocalDriver(url=path)
|
544
|
-
await driver.initialize()
|
545
|
-
|
546
|
-
yield driver
|
547
|
-
|
548
|
-
await driver.finalize()
|
549
|
-
|
550
|
-
ingest_settings.driver_local_url = None
|
551
|
-
clean_utility(Utility.MAINDB_DRIVER)
|
552
|
-
|
553
|
-
|
554
|
-
@pytest.fixture(scope="function")
|
555
|
-
def tikv_driver_settings(tikvd):
|
556
|
-
if os.environ.get("TESTING_TIKV_LOCAL", None):
|
557
|
-
url = "localhost:2379"
|
558
|
-
else:
|
559
|
-
url = f"{tikvd[0]}:{tikvd[2]}"
|
560
|
-
|
561
|
-
# before using tikv, clear the db
|
562
|
-
# delete here instead of `tikv_driver` fixture because
|
563
|
-
# these settings are used in tests that the driver fixture
|
564
|
-
# is not used
|
565
|
-
client = tikv_client.TransactionClient.connect([url])
|
566
|
-
txn = client.begin(pessimistic=False)
|
567
|
-
for key in txn.scan_keys(start=b"", end=None, limit=99999):
|
568
|
-
txn.delete(key)
|
569
|
-
txn.commit()
|
570
|
-
|
571
|
-
return DriverSettings(driver=DriverConfig.TIKV, driver_tikv_url=[url])
|
572
|
-
|
573
|
-
|
574
|
-
@pytest.fixture(scope="function")
|
575
|
-
async def tikv_driver(tikv_driver_settings) -> AsyncIterator[Driver]:
|
576
|
-
url = tikv_driver_settings.driver_tikv_url
|
577
|
-
ingest_settings.driver = DriverConfig.TIKV
|
578
|
-
ingest_settings.driver_tikv_url = url
|
579
|
-
|
580
|
-
driver: Driver = TiKVDriver(url=url)
|
581
|
-
await driver.initialize()
|
582
|
-
|
583
|
-
yield driver
|
584
|
-
|
585
|
-
await driver.finalize()
|
586
|
-
ingest_settings.driver_tikv_url = None
|
587
|
-
clean_utility(Utility.MAINDB_DRIVER)
|
588
|
-
|
589
|
-
|
590
|
-
@pytest.fixture(scope="function")
|
591
|
-
def redis_driver_settings(redis):
|
592
|
-
return DriverSettings(
|
593
|
-
driver=DriverConfig.REDIS,
|
594
|
-
driver_redis_url=f"redis://{redis[0]}:{redis[1]}",
|
595
|
-
)
|
596
|
-
|
597
|
-
|
598
|
-
@pytest.fixture(scope="function")
|
599
|
-
async def redis_driver(redis_driver_settings) -> AsyncIterator[RedisDriver]:
|
600
|
-
url = redis_driver_settings.driver_redis_url
|
601
|
-
ingest_settings.driver = DriverConfig.REDIS
|
602
|
-
ingest_settings.driver_redis_url = url
|
603
|
-
|
604
|
-
driver = RedisDriver(url=url)
|
605
|
-
await driver.initialize()
|
606
|
-
|
607
|
-
assert driver.redis is not None
|
608
|
-
await driver.redis.flushall()
|
609
|
-
logging.info(f"Redis driver ready at {url}")
|
610
|
-
|
611
|
-
set_utility(Utility.MAINDB_DRIVER, driver)
|
612
|
-
|
613
|
-
yield driver
|
614
|
-
|
615
|
-
await driver.finalize()
|
616
|
-
ingest_settings.driver_redis_url = None
|
617
|
-
clean_utility(Utility.MAINDB_DRIVER)
|
618
|
-
|
619
|
-
|
620
|
-
@pytest.fixture(scope="function")
|
621
|
-
def pg_driver_settings(pg):
|
622
|
-
url = f"postgresql://postgres:postgres@{pg[0]}:{pg[1]}/postgres"
|
623
|
-
return DriverSettings(
|
624
|
-
driver=DriverConfig.PG,
|
625
|
-
driver_pg_url=url,
|
626
|
-
)
|
627
|
-
|
628
|
-
|
629
|
-
@pytest.fixture(scope="function")
|
630
|
-
async def pg_driver(pg_driver_settings):
|
631
|
-
url = pg_driver_settings.driver_pg_url
|
632
|
-
ingest_settings.driver = DriverConfig.PG
|
633
|
-
ingest_settings.driver_pg_url = url
|
634
|
-
|
635
|
-
conn = await asyncpg.connect(url)
|
636
|
-
await conn.execute(
|
637
|
-
"""
|
638
|
-
DROP table IF EXISTS resources;
|
639
|
-
"""
|
640
|
-
)
|
641
|
-
await conn.close()
|
642
|
-
driver = PGDriver(url=url)
|
643
|
-
await driver.initialize()
|
644
|
-
|
645
|
-
yield driver
|
646
|
-
|
647
|
-
await driver.finalize()
|
648
|
-
ingest_settings.driver_pg_url = None
|
649
|
-
|
650
|
-
|
651
|
-
def driver_settings_lazy_fixtures(default_drivers="local"):
|
652
|
-
driver_types = os.environ.get("TESTING_MAINDB_DRIVERS", default_drivers)
|
653
|
-
return [
|
654
|
-
lazy_fixture.lf(f"{driver_type}_driver_settings")
|
655
|
-
for driver_type in driver_types.split(",")
|
656
|
-
]
|
657
|
-
|
658
|
-
|
659
|
-
@pytest.fixture(scope="function", params=driver_settings_lazy_fixtures())
|
660
|
-
def driver_settings(request):
|
661
|
-
"""
|
662
|
-
Allows dynamically loading the driver fixtures via env vars.
|
663
|
-
|
664
|
-
MAINDB_DRIVER=redis,local pytest nucliadb/nucliadb/tests/
|
665
|
-
|
666
|
-
Any test using the nucliadb fixture will be run twice, once with redis driver and once with local driver.
|
667
|
-
"""
|
668
|
-
yield request.param
|
669
|
-
|
670
|
-
|
671
|
-
def driver_lazy_fixtures(default_drivers: str = "redis"):
|
672
|
-
"""
|
673
|
-
Allows running tests using maindb_driver for each supported driver type via env vars.
|
674
|
-
|
675
|
-
MAINDB_DRIVER=redis,local pytest nucliadb/nucliadb/ingest/tests/
|
676
|
-
|
677
|
-
Any test using the maindb_driver fixture will be run twice, once with redis_driver and once with local_driver.
|
678
|
-
"""
|
679
|
-
driver_types = os.environ.get("TESTING_MAINDB_DRIVERS", default_drivers)
|
680
|
-
return [
|
681
|
-
lazy_fixture.lf(f"{driver_type}_driver")
|
682
|
-
for driver_type in driver_types.split(",")
|
683
|
-
]
|
684
|
-
|
685
|
-
|
686
|
-
@pytest.fixture(
|
687
|
-
scope="function",
|
688
|
-
params=driver_lazy_fixtures(),
|
689
|
-
)
|
690
|
-
async def maindb_driver(request):
|
691
|
-
driver = request.param
|
692
|
-
set_utility(Utility.MAINDB_DRIVER, driver)
|
693
|
-
|
694
|
-
yield driver
|
695
|
-
|
696
|
-
await cleanup_maindb(driver)
|
697
|
-
clean_utility(Utility.MAINDB_DRIVER)
|
698
|
-
|
699
|
-
|
700
|
-
async def maybe_cleanup_maindb():
|
701
|
-
try:
|
702
|
-
driver = get_driver()
|
703
|
-
except UnsetUtility:
|
704
|
-
pass
|
705
|
-
else:
|
706
|
-
try:
|
707
|
-
await cleanup_maindb(driver)
|
708
|
-
except Exception:
|
709
|
-
logger.error("Could not cleanup maindb on test teardown")
|
710
|
-
pass
|
711
|
-
|
712
|
-
|
713
|
-
async def cleanup_maindb(driver: Driver):
|
714
|
-
if not driver.initialized:
|
715
|
-
return
|
716
|
-
async with driver.transaction() as txn:
|
717
|
-
all_keys = [k async for k in txn.keys("", count=-1)]
|
718
|
-
for key in all_keys:
|
719
|
-
await txn.delete(key)
|
720
|
-
await txn.commit()
|
721
|
-
|
722
|
-
|
723
|
-
@pytest.fixture(scope="function")
|
724
|
-
async def txn(maindb_driver):
|
725
|
-
txn = await maindb_driver.begin()
|
726
|
-
yield txn
|
727
|
-
await txn.abort()
|
728
|
-
|
729
|
-
|
730
|
-
@pytest.fixture(scope="function")
|
731
|
-
async def shard_manager(storage, maindb_driver):
|
732
|
-
mng = cluster_manager.KBShardManager()
|
733
|
-
set_utility(Utility.SHARD_MANAGER, mng)
|
734
|
-
yield mng
|
735
|
-
clean_utility(Utility.SHARD_MANAGER)
|