nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,301 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import asyncio
|
21
|
-
from io import BytesIO
|
22
|
-
from unittest.mock import AsyncMock, Mock
|
23
|
-
|
24
|
-
import nats.errors
|
25
|
-
import pytest
|
26
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
|
27
|
-
from starlette.requests import Request
|
28
|
-
|
29
|
-
from nucliadb.export_import.exceptions import ExportStreamExhausted
|
30
|
-
from nucliadb.export_import.models import ImportMetadata
|
31
|
-
from nucliadb.export_import.utils import (
|
32
|
-
ExportStream,
|
33
|
-
TaskRetryHandler,
|
34
|
-
get_cloud_files,
|
35
|
-
import_broker_message,
|
36
|
-
transaction_commit,
|
37
|
-
)
|
38
|
-
from nucliadb_models.export_import import Status
|
39
|
-
from nucliadb_protos import resources_pb2
|
40
|
-
from nucliadb_utils.const import Streams
|
41
|
-
|
42
|
-
|
43
|
-
@pytest.fixture(scope="function")
|
44
|
-
def transaction():
|
45
|
-
mock = Mock()
|
46
|
-
mock.commit = AsyncMock()
|
47
|
-
yield mock
|
48
|
-
|
49
|
-
|
50
|
-
@pytest.fixture(scope="function")
|
51
|
-
def partitioning():
|
52
|
-
mock = Mock()
|
53
|
-
mock.generate_partition = Mock(return_value=1)
|
54
|
-
yield mock
|
55
|
-
|
56
|
-
|
57
|
-
def get_cf(uri=None) -> resources_pb2.CloudFile:
|
58
|
-
cf = resources_pb2.CloudFile()
|
59
|
-
uri = uri or "//foo/bar"
|
60
|
-
cf.uri = uri
|
61
|
-
cf.source = resources_pb2.CloudFile.Source.LOCAL
|
62
|
-
return cf
|
63
|
-
|
64
|
-
|
65
|
-
@pytest.fixture(scope="function")
|
66
|
-
def broker_message():
|
67
|
-
bm = BrokerMessage()
|
68
|
-
bm.kbid = "foobar"
|
69
|
-
|
70
|
-
# Add a file field
|
71
|
-
file = resources_pb2.FieldFile()
|
72
|
-
file.file.CopyFrom(get_cf("file"))
|
73
|
-
bm.files["file"].CopyFrom(file)
|
74
|
-
|
75
|
-
# Add a conversation with an attachment
|
76
|
-
conversation = resources_pb2.Conversation()
|
77
|
-
message = resources_pb2.Message()
|
78
|
-
attachment = get_cf("attachment")
|
79
|
-
message.content.attachments.append(attachment)
|
80
|
-
conversation.messages.append(message)
|
81
|
-
bm.conversations["conversation"].CopyFrom(conversation)
|
82
|
-
|
83
|
-
# Add a layout with a file
|
84
|
-
layout = resources_pb2.FieldLayout()
|
85
|
-
block = resources_pb2.Block()
|
86
|
-
block.file.CopyFrom(get_cf("layout"))
|
87
|
-
layout.body.blocks["foo"].CopyFrom(block)
|
88
|
-
bm.layouts["layout"].CopyFrom(layout)
|
89
|
-
|
90
|
-
# Field extracted data
|
91
|
-
fed = resources_pb2.FileExtractedData()
|
92
|
-
fed.file_generated["foo"].CopyFrom(get_cf("field_file_generated"))
|
93
|
-
fed.file_preview.CopyFrom(get_cf("field_file_preview"))
|
94
|
-
fed.file_thumbnail.CopyFrom(get_cf("field_file_thumbnail"))
|
95
|
-
fed.file_pages_previews.pages.append(get_cf("field_file_pages_previews"))
|
96
|
-
bm.file_extracted_data.append(fed)
|
97
|
-
|
98
|
-
# Link extracted data
|
99
|
-
led = resources_pb2.LinkExtractedData()
|
100
|
-
led.link_thumbnail.CopyFrom(get_cf("link_thumbnail"))
|
101
|
-
led.link_preview.CopyFrom(get_cf("link_preview"))
|
102
|
-
led.link_image.CopyFrom(get_cf("link_image"))
|
103
|
-
bm.link_extracted_data.append(led)
|
104
|
-
|
105
|
-
# Field metadata
|
106
|
-
fcmw = resources_pb2.FieldComputedMetadataWrapper()
|
107
|
-
fcmw.metadata.metadata.thumbnail.CopyFrom(get_cf("metadata_thumbnail"))
|
108
|
-
fcmw.metadata.split_metadata["foo"].thumbnail.CopyFrom(
|
109
|
-
get_cf("metadata_split_thumbnail")
|
110
|
-
)
|
111
|
-
bm.field_metadata.append(fcmw)
|
112
|
-
|
113
|
-
return bm
|
114
|
-
|
115
|
-
|
116
|
-
class ContextMock:
|
117
|
-
def __init__(self, transaction, partitioning):
|
118
|
-
self.transaction = transaction
|
119
|
-
self.partitioning = partitioning
|
120
|
-
|
121
|
-
|
122
|
-
async def test_import_broker_message(broker_message, transaction, partitioning):
|
123
|
-
context = ContextMock(transaction, partitioning)
|
124
|
-
|
125
|
-
import_kbid = "import_kbid"
|
126
|
-
assert broker_message.kbid != import_kbid
|
127
|
-
|
128
|
-
await import_broker_message(context, import_kbid, broker_message)
|
129
|
-
|
130
|
-
# Sends two messages
|
131
|
-
assert transaction.commit.call_count == 2
|
132
|
-
|
133
|
-
for call in transaction.commit.call_args_list:
|
134
|
-
# Message contains import kbid
|
135
|
-
assert call[0][0].kbid == import_kbid
|
136
|
-
|
137
|
-
# Sends to correct topic
|
138
|
-
assert call[1]["target_subject"] == Streams.INGEST_PROCESSED.subject
|
139
|
-
|
140
|
-
|
141
|
-
def test_get_cloud_files(broker_message):
|
142
|
-
# All expected binaries are returned
|
143
|
-
binaries = get_cloud_files(broker_message)
|
144
|
-
assert len(binaries) == 12
|
145
|
-
for cf in binaries:
|
146
|
-
assert cf.source == resources_pb2.CloudFile.Source.LOCAL
|
147
|
-
|
148
|
-
# Make sure that the source is set to export on the broker message cfs
|
149
|
-
for cf in get_cloud_files(broker_message):
|
150
|
-
assert cf.source == resources_pb2.CloudFile.Source.EXPORT
|
151
|
-
|
152
|
-
|
153
|
-
async def test_export_stream_simple():
|
154
|
-
|
155
|
-
async def export_generator():
|
156
|
-
export = BytesIO(b"1234567890")
|
157
|
-
while True:
|
158
|
-
await asyncio.sleep(0)
|
159
|
-
chunk = export.read(2)
|
160
|
-
if not chunk:
|
161
|
-
break
|
162
|
-
yield chunk
|
163
|
-
|
164
|
-
stream = ExportStream(export_generator())
|
165
|
-
assert stream.read_bytes == 0
|
166
|
-
assert await stream.read(0) == b""
|
167
|
-
assert stream.read_bytes == 0
|
168
|
-
assert await stream.read(1) == b"1"
|
169
|
-
assert stream.read_bytes == 1
|
170
|
-
assert await stream.read(2) == b"23"
|
171
|
-
assert stream.read_bytes == 3
|
172
|
-
assert await stream.read(50) == b"4567890"
|
173
|
-
assert stream.read_bytes == 10
|
174
|
-
with pytest.raises(ExportStreamExhausted):
|
175
|
-
await stream.read(1)
|
176
|
-
|
177
|
-
|
178
|
-
class DummyTestRequest(Request):
|
179
|
-
def __init__(self, data: bytes, receive_chunk_size: int = 10):
|
180
|
-
super().__init__(
|
181
|
-
scope={
|
182
|
-
"type": "http",
|
183
|
-
"http_version": "1.1",
|
184
|
-
"method": "GET",
|
185
|
-
"headers": [],
|
186
|
-
},
|
187
|
-
receive=self.receive,
|
188
|
-
)
|
189
|
-
self.receive_chunk_size = receive_chunk_size
|
190
|
-
self.bytes = BytesIO(data)
|
191
|
-
|
192
|
-
async def receive(self):
|
193
|
-
chunk = self.bytes.read(self.receive_chunk_size)
|
194
|
-
more_data = True
|
195
|
-
if chunk == b"":
|
196
|
-
more_data = False
|
197
|
-
return {"type": "http.request", "body": chunk, "more_body": more_data}
|
198
|
-
|
199
|
-
|
200
|
-
async def test_export_stream():
|
201
|
-
request = DummyTestRequest(data=b"01234XYZ", receive_chunk_size=2)
|
202
|
-
|
203
|
-
export_stream = ExportStream(request.stream())
|
204
|
-
assert await export_stream.read(0) == b""
|
205
|
-
assert export_stream.read_bytes == 0
|
206
|
-
|
207
|
-
for i in range(5):
|
208
|
-
assert await export_stream.read(1) == f"{i}".encode()
|
209
|
-
assert export_stream.read_bytes == 5
|
210
|
-
|
211
|
-
assert await export_stream.read(3) == b"XYZ"
|
212
|
-
assert export_stream.read_bytes == 8
|
213
|
-
|
214
|
-
with pytest.raises(ExportStreamExhausted):
|
215
|
-
await export_stream.read(1)
|
216
|
-
|
217
|
-
with pytest.raises(ExportStreamExhausted):
|
218
|
-
await export_stream.read(0)
|
219
|
-
|
220
|
-
request = DummyTestRequest(data=b"foobar", receive_chunk_size=2)
|
221
|
-
export_stream = ExportStream(request.stream())
|
222
|
-
assert await export_stream.read(50) == b"foobar"
|
223
|
-
assert export_stream.read_bytes == 6
|
224
|
-
|
225
|
-
with pytest.raises(ExportStreamExhausted):
|
226
|
-
await export_stream.read(0)
|
227
|
-
|
228
|
-
|
229
|
-
class TestTaskRetryHandler:
|
230
|
-
@pytest.fixture(scope="function")
|
231
|
-
def callback(self):
|
232
|
-
return AsyncMock()
|
233
|
-
|
234
|
-
@pytest.fixture(scope="function")
|
235
|
-
def dm(self):
|
236
|
-
dm = Mock()
|
237
|
-
dm.set_metadata = AsyncMock()
|
238
|
-
return dm
|
239
|
-
|
240
|
-
@pytest.fixture(scope="function")
|
241
|
-
def metadata(self):
|
242
|
-
return ImportMetadata(kbid="kbid", id="import_id")
|
243
|
-
|
244
|
-
async def test_ok(self, callback, dm, metadata):
|
245
|
-
callback.return_value = 100
|
246
|
-
trh = TaskRetryHandler("foo", dm, metadata)
|
247
|
-
callback_retried = trh.wrap(callback)
|
248
|
-
|
249
|
-
result = await callback_retried("foo", bar="baz")
|
250
|
-
assert result == 100
|
251
|
-
|
252
|
-
callback.assert_called_once_with("foo", bar="baz")
|
253
|
-
|
254
|
-
assert metadata.task.status == Status.FINISHED
|
255
|
-
|
256
|
-
async def test_errors_are_retried(self, callback, dm, metadata):
|
257
|
-
callback.side_effect = ValueError("foo")
|
258
|
-
|
259
|
-
trh = TaskRetryHandler("foo", dm, metadata, max_tries=2)
|
260
|
-
callback_retried = trh.wrap(callback)
|
261
|
-
|
262
|
-
with pytest.raises(ValueError):
|
263
|
-
await callback_retried("foo", bar="baz")
|
264
|
-
|
265
|
-
callback.assert_called_once_with("foo", bar="baz")
|
266
|
-
|
267
|
-
assert metadata.task.status == Status.RUNNING
|
268
|
-
assert metadata.task.retries == 1
|
269
|
-
|
270
|
-
with pytest.raises(ValueError):
|
271
|
-
await callback_retried("foo", bar="baz")
|
272
|
-
|
273
|
-
assert metadata.task.status == Status.RUNNING
|
274
|
-
assert metadata.task.retries == 2
|
275
|
-
|
276
|
-
async def test_ignored_statuses(self, callback, dm, metadata):
|
277
|
-
trh = TaskRetryHandler("foo", dm, metadata)
|
278
|
-
callback_retried = trh.wrap(callback)
|
279
|
-
|
280
|
-
for status in (Status.ERRORED, Status.FINISHED):
|
281
|
-
metadata.task.status = status
|
282
|
-
await callback_retried("foo", bar="baz")
|
283
|
-
callback.assert_not_called()
|
284
|
-
|
285
|
-
|
286
|
-
async def test_transaction_commit_sends_storage_reference_on_max_payload_error():
|
287
|
-
context = Mock()
|
288
|
-
context.transaction.commit = AsyncMock(
|
289
|
-
side_effect=[nats.errors.MaxPayloadError, None]
|
290
|
-
)
|
291
|
-
context.blob_storage = AsyncMock()
|
292
|
-
context.blob_storage.set_stream_message.return_value = "key"
|
293
|
-
|
294
|
-
bm = BrokerMessage(kbid="kbid", uuid="uuid")
|
295
|
-
|
296
|
-
await transaction_commit(context, bm, 1)
|
297
|
-
|
298
|
-
assert context.transaction.commit.call_count == 2
|
299
|
-
call = context.transaction.commit.call_args_list[-1]
|
300
|
-
assert isinstance(call[1]["writer"], BrokerMessageBlobReference)
|
301
|
-
assert call[1]["headers"] == {"X-MESSAGE-TYPE": "PROXY"}
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
@@ -1,87 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import types
|
21
|
-
from unittest.mock import AsyncMock, Mock, patch
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
from nucliadb.migrator import command, migrator
|
26
|
-
from nucliadb.migrator.exceptions import MigrationValidationError
|
27
|
-
from nucliadb.migrator.models import Migration
|
28
|
-
|
29
|
-
|
30
|
-
def test_get_migrations():
|
31
|
-
migrations = migrator.get_migrations()
|
32
|
-
assert len(migrations) > 0
|
33
|
-
assert migrations[0].version == 1
|
34
|
-
assert migrations[0].module.__name__ == "migrations.0001_bootstrap"
|
35
|
-
|
36
|
-
|
37
|
-
def test_get_migration_with_filtering():
|
38
|
-
with patch("nucliadb.migrator.utils.get_migration_modules") as mock:
|
39
|
-
mock.return_value = [
|
40
|
-
(types.ModuleType("m1"), 1),
|
41
|
-
(types.ModuleType("m2"), 2),
|
42
|
-
(types.ModuleType("m3"), 3),
|
43
|
-
(types.ModuleType("m4"), 4),
|
44
|
-
]
|
45
|
-
migrations = migrator.get_migrations(from_version=2, to_version=3)
|
46
|
-
assert len(migrations) == 1
|
47
|
-
assert migrations[0].version == 3
|
48
|
-
assert migrations[0].module.__name__ == "m3"
|
49
|
-
|
50
|
-
|
51
|
-
async def test_run_all_kb_migrations_raises_on_failure():
|
52
|
-
execution_context = Mock()
|
53
|
-
execution_context.data_manager = Mock()
|
54
|
-
execution_context.data_manager.get_kb_migrations = AsyncMock(
|
55
|
-
return_value=["foo", "bar"]
|
56
|
-
)
|
57
|
-
execution_context.settings = Mock(max_concurrent_migrations=1)
|
58
|
-
with patch(
|
59
|
-
"nucliadb.migrator.migrator.run_kb_migrations",
|
60
|
-
side_effect=[None, Exception("Boom")],
|
61
|
-
) as mock:
|
62
|
-
with pytest.raises(Exception) as exc_info:
|
63
|
-
await migrator.run_all_kb_migrations(execution_context, 1)
|
64
|
-
assert "Failed to migrate KBs. Failures: 1" in str(exc_info.value)
|
65
|
-
assert mock.call_count == 2
|
66
|
-
|
67
|
-
|
68
|
-
async def test_migrations_validation():
|
69
|
-
migrations = [
|
70
|
-
Migration(version=1, module=Mock()),
|
71
|
-
Migration(version=2, module=Mock()),
|
72
|
-
Migration(version=3, module=Mock()),
|
73
|
-
]
|
74
|
-
with patch("nucliadb.migrator.command.get_migrations", return_value=migrations):
|
75
|
-
command.validate()
|
76
|
-
|
77
|
-
|
78
|
-
async def test_migrations_validation_with_errors():
|
79
|
-
migrations = [
|
80
|
-
Migration(version=1, module=Mock()),
|
81
|
-
Migration(version=2, module=Mock()),
|
82
|
-
Migration(version=2, module=Mock()),
|
83
|
-
Migration(version=3, module=Mock()),
|
84
|
-
]
|
85
|
-
with patch("nucliadb.migrator.command.get_migrations", return_value=migrations):
|
86
|
-
with pytest.raises(MigrationValidationError):
|
87
|
-
command.validate()
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
@@ -1,42 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest.mock import AsyncMock, Mock
|
21
|
-
|
22
|
-
import nats
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
|
26
|
-
@pytest.fixture(scope="function")
|
27
|
-
def nats_manager():
|
28
|
-
nats_manager = Mock()
|
29
|
-
nats_manager.subscribe = AsyncMock()
|
30
|
-
js = Mock()
|
31
|
-
js.stream_info = AsyncMock(side_effect=nats.js.errors.NotFoundError)
|
32
|
-
js.add_stream = AsyncMock()
|
33
|
-
nats_manager.js = js
|
34
|
-
yield nats_manager
|
35
|
-
|
36
|
-
|
37
|
-
@pytest.fixture(scope="function")
|
38
|
-
def context(nats_manager):
|
39
|
-
context = Mock()
|
40
|
-
context.initialize = AsyncMock()
|
41
|
-
context.nats_manager = nats_manager
|
42
|
-
yield context
|
@@ -1,92 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest.mock import AsyncMock, MagicMock
|
21
|
-
|
22
|
-
import pydantic
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
from nucliadb.tasks.consumer import create_consumer
|
26
|
-
|
27
|
-
|
28
|
-
class Message(pydantic.BaseModel):
|
29
|
-
kbid: str
|
30
|
-
|
31
|
-
|
32
|
-
def test_create_consumer():
|
33
|
-
stream = MagicMock()
|
34
|
-
|
35
|
-
async def callback(): ...
|
36
|
-
|
37
|
-
consumer = create_consumer(
|
38
|
-
"foo", stream=stream, msg_type=Message, callback=callback
|
39
|
-
)
|
40
|
-
assert not consumer.initialized
|
41
|
-
|
42
|
-
assert consumer.name == "foo"
|
43
|
-
assert consumer.stream == stream
|
44
|
-
assert consumer.callback == callback
|
45
|
-
assert consumer.msg_type == Message
|
46
|
-
|
47
|
-
|
48
|
-
class TestSubscriptionWorker:
|
49
|
-
@pytest.fixture(scope="function")
|
50
|
-
async def callback(self):
|
51
|
-
yield AsyncMock()
|
52
|
-
|
53
|
-
@pytest.fixture(scope="function")
|
54
|
-
async def consumer(self, context, callback):
|
55
|
-
consumer = create_consumer(
|
56
|
-
"foo", stream=MagicMock(), callback=callback, msg_type=Message
|
57
|
-
)
|
58
|
-
await consumer.initialize(context)
|
59
|
-
yield consumer
|
60
|
-
|
61
|
-
@pytest.fixture(scope="function")
|
62
|
-
def task_message(self):
|
63
|
-
yield Message(kbid="kbid")
|
64
|
-
|
65
|
-
@pytest.fixture(scope="function")
|
66
|
-
def msg(self, task_message):
|
67
|
-
data = task_message.json().encode("utf-8")
|
68
|
-
msg = MagicMock()
|
69
|
-
msg.data = data
|
70
|
-
msg.ack = AsyncMock()
|
71
|
-
msg.nak = AsyncMock()
|
72
|
-
yield msg
|
73
|
-
|
74
|
-
async def test_callback_ok(self, consumer, msg, callback):
|
75
|
-
await consumer.subscription_worker(msg)
|
76
|
-
|
77
|
-
callback.assert_called_once()
|
78
|
-
|
79
|
-
async def test_callback_error(self, consumer, msg, callback):
|
80
|
-
callback.side_effect = Exception("foo")
|
81
|
-
|
82
|
-
await consumer.subscription_worker(msg)
|
83
|
-
|
84
|
-
callback.assert_called_once()
|
85
|
-
msg.nak.assert_called_once()
|
86
|
-
|
87
|
-
async def test_validation_error(self, consumer, msg):
|
88
|
-
msg.data = b"foo"
|
89
|
-
|
90
|
-
await consumer.subscription_worker(msg)
|
91
|
-
|
92
|
-
msg.ack.assert_called_once()
|
@@ -1,95 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest.mock import AsyncMock, MagicMock, Mock
|
21
|
-
|
22
|
-
import nats
|
23
|
-
import pydantic
|
24
|
-
import pytest
|
25
|
-
|
26
|
-
from nucliadb.tasks.producer import create_producer
|
27
|
-
|
28
|
-
|
29
|
-
class Message(pydantic.BaseModel):
|
30
|
-
kbid: str
|
31
|
-
|
32
|
-
|
33
|
-
def test_create_producer():
|
34
|
-
stream = MagicMock()
|
35
|
-
|
36
|
-
producer = create_producer("foo", stream=stream, msg_type=Message)
|
37
|
-
assert not producer.initialized
|
38
|
-
|
39
|
-
assert producer.name == "foo"
|
40
|
-
assert producer.stream == stream
|
41
|
-
|
42
|
-
|
43
|
-
class TestProducer:
|
44
|
-
@pytest.fixture(scope="function")
|
45
|
-
def stream(self):
|
46
|
-
return MagicMock()
|
47
|
-
|
48
|
-
@pytest.fixture(scope="function")
|
49
|
-
def nats_manager(self):
|
50
|
-
mgr = MagicMock()
|
51
|
-
mgr.js.stream_info = AsyncMock(side_effect=nats.js.errors.NotFoundError)
|
52
|
-
mgr.js.add_stream = AsyncMock()
|
53
|
-
mgr.js.publish = AsyncMock()
|
54
|
-
yield mgr
|
55
|
-
|
56
|
-
@pytest.fixture(scope="function")
|
57
|
-
async def producer(self, context, stream, nats_manager):
|
58
|
-
async def callback(context, msg: Message):
|
59
|
-
pass
|
60
|
-
|
61
|
-
producer = create_producer("foo", stream=stream, msg_type=Message)
|
62
|
-
await producer.initialize(context)
|
63
|
-
producer.context.nats_manager = nats_manager
|
64
|
-
yield producer
|
65
|
-
|
66
|
-
async def test_initialize_creates_stream(self, producer, nats_manager):
|
67
|
-
# Check that the stream is on inialization
|
68
|
-
assert nats_manager.js.add_stream.call_count == 1
|
69
|
-
assert nats_manager.js.add_stream.call_args[1]["name"] == producer.stream.name
|
70
|
-
assert nats_manager.js.add_stream.call_args[1]["subjects"] == [
|
71
|
-
producer.stream.subject
|
72
|
-
]
|
73
|
-
|
74
|
-
async def test_produce_raises_error_if_not_initialized(self, producer):
|
75
|
-
producer.initialized = False
|
76
|
-
with pytest.raises(RuntimeError):
|
77
|
-
await producer(Mock())
|
78
|
-
|
79
|
-
async def test_produce_ok(self, producer, stream):
|
80
|
-
msg = Message(kbid="kbid")
|
81
|
-
|
82
|
-
await producer(msg)
|
83
|
-
|
84
|
-
publish_args = producer.context.nats_manager.js.publish.call_args[0]
|
85
|
-
assert publish_args[0] == stream.subject
|
86
|
-
|
87
|
-
raw_message = publish_args[1]
|
88
|
-
sent_message = Message.parse_raw(raw_message)
|
89
|
-
assert sent_message == msg
|
90
|
-
|
91
|
-
async def test_produce_raises_publish_errors(self, producer, nats_manager):
|
92
|
-
nats_manager.js.publish.side_effect = ValueError("foo")
|
93
|
-
|
94
|
-
with pytest.raises(ValueError):
|
95
|
-
await producer(Mock())
|