nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,95 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
from uuid import uuid4
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
from nucliadb.tests.utils.aiohttp_session import get_mocked_session
|
26
|
-
|
27
|
-
|
28
|
-
@pytest.mark.parametrize("onprem", [True, False])
|
29
|
-
@pytest.mark.parametrize(
|
30
|
-
"mock_payload",
|
31
|
-
[
|
32
|
-
{"seqid": 1, "account_seq": 1, "queue": "private"},
|
33
|
-
{"seqid": 1, "account_seq": 1, "queue": "shared"},
|
34
|
-
{"seqid": 1, "account_seq": None, "queue": "private"},
|
35
|
-
{"seqid": 1, "account_seq": None, "queue": "shared"},
|
36
|
-
{"seqid": 1, "queue": "private"},
|
37
|
-
{"seqid": 1, "queue": "shared"},
|
38
|
-
],
|
39
|
-
)
|
40
|
-
@pytest.mark.asyncio
|
41
|
-
async def test_send_to_process(onprem, mock_payload):
|
42
|
-
"""
|
43
|
-
Test that send_to_process does not fail
|
44
|
-
"""
|
45
|
-
|
46
|
-
from nucliadb.ingest.processing import ProcessingEngine, PushPayload
|
47
|
-
|
48
|
-
fake_nuclia_proxy_url = "http://fake_proxy"
|
49
|
-
processing_engine = ProcessingEngine(
|
50
|
-
onprem=onprem,
|
51
|
-
nuclia_processing_cluster_url=fake_nuclia_proxy_url,
|
52
|
-
nuclia_public_url=fake_nuclia_proxy_url,
|
53
|
-
)
|
54
|
-
await processing_engine.initialize()
|
55
|
-
|
56
|
-
payload = PushPayload(
|
57
|
-
uuid=str(uuid4()), kbid=str(uuid4()), userid=str(uuid4()), partition=0
|
58
|
-
)
|
59
|
-
|
60
|
-
processing_engine.session = get_mocked_session(
|
61
|
-
"POST", 200, json=mock_payload, context_manager=False
|
62
|
-
)
|
63
|
-
await processing_engine.send_to_process(payload, partition=0)
|
64
|
-
|
65
|
-
await processing_engine.finalize()
|
66
|
-
|
67
|
-
|
68
|
-
@pytest.mark.parametrize("onprem", [True, False])
|
69
|
-
@pytest.mark.asyncio
|
70
|
-
async def test_delete_from_processing(onprem):
|
71
|
-
"""
|
72
|
-
Test that send_to_process does not fail
|
73
|
-
"""
|
74
|
-
|
75
|
-
from nucliadb.ingest.processing import ProcessingEngine
|
76
|
-
|
77
|
-
fake_nuclia_proxy_url = "http://fake_proxy"
|
78
|
-
processing_engine = ProcessingEngine(
|
79
|
-
onprem=onprem,
|
80
|
-
nuclia_processing_cluster_url=fake_nuclia_proxy_url,
|
81
|
-
nuclia_public_url=fake_nuclia_proxy_url,
|
82
|
-
)
|
83
|
-
await processing_engine.initialize()
|
84
|
-
|
85
|
-
processing_engine.session = get_mocked_session(
|
86
|
-
"POST",
|
87
|
-
200,
|
88
|
-
json={"kbid": "kbid", "resource_id": "resource_id"},
|
89
|
-
context_manager=False,
|
90
|
-
)
|
91
|
-
await processing_engine.delete_from_processing(
|
92
|
-
kbid="kbid", resource_id="resource_id"
|
93
|
-
)
|
94
|
-
|
95
|
-
await processing_engine.finalize()
|
@@ -1,272 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import uuid
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
from nucliadb_protos.resources_pb2 import (
|
24
|
-
Classification,
|
25
|
-
FieldComputedMetadataWrapper,
|
26
|
-
FieldID,
|
27
|
-
FieldText,
|
28
|
-
FieldType,
|
29
|
-
)
|
30
|
-
from nucliadb_protos.utils_pb2 import Relation, RelationNode
|
31
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
32
|
-
|
33
|
-
from nucliadb.ingest import SERVICE_NAME
|
34
|
-
from nucliadb_utils.utilities import get_indexing, get_storage
|
35
|
-
|
36
|
-
|
37
|
-
@pytest.mark.asyncio
|
38
|
-
async def test_ingest_relations_indexing(
|
39
|
-
fake_node, local_files, storage, knowledgebox_ingest, processor
|
40
|
-
):
|
41
|
-
rid = str(uuid.uuid4())
|
42
|
-
bm = BrokerMessage(
|
43
|
-
kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
|
44
|
-
)
|
45
|
-
|
46
|
-
e0 = RelationNode(value="E0", ntype=RelationNode.NodeType.ENTITY, subtype="")
|
47
|
-
e1 = RelationNode(
|
48
|
-
value="E1", ntype=RelationNode.NodeType.ENTITY, subtype="Official"
|
49
|
-
)
|
50
|
-
e2 = RelationNode(
|
51
|
-
value="E2", ntype=RelationNode.NodeType.ENTITY, subtype="Propaganda"
|
52
|
-
)
|
53
|
-
r0 = Relation(
|
54
|
-
relation=Relation.RelationType.CHILD, source=e1, to=e2, relation_label="R0"
|
55
|
-
)
|
56
|
-
r1 = Relation(
|
57
|
-
relation=Relation.RelationType.ENTITY, source=e0, to=e2, relation_label="R1"
|
58
|
-
)
|
59
|
-
r2 = Relation(
|
60
|
-
relation=Relation.RelationType.CHILD, source=e0, to=e1, relation_label="R2"
|
61
|
-
)
|
62
|
-
|
63
|
-
bm.relations.extend([r0, r1, r2])
|
64
|
-
|
65
|
-
await processor.process(message=bm, seqid=1)
|
66
|
-
|
67
|
-
index = get_indexing()
|
68
|
-
storage = await get_storage(service_name=SERVICE_NAME)
|
69
|
-
|
70
|
-
pb = await storage.get_indexing(index._calls[0][1])
|
71
|
-
|
72
|
-
assert len(pb.relations) == 3
|
73
|
-
assert pb.relations[0] == r0
|
74
|
-
assert pb.relations[1] == r1
|
75
|
-
assert pb.relations[2] == r2
|
76
|
-
|
77
|
-
|
78
|
-
@pytest.mark.asyncio
|
79
|
-
async def test_ingest_label_relation_extraction(
|
80
|
-
fake_node, local_files, storage, knowledgebox_ingest, processor
|
81
|
-
):
|
82
|
-
rid = str(uuid.uuid4())
|
83
|
-
bm = BrokerMessage(
|
84
|
-
kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
|
85
|
-
)
|
86
|
-
|
87
|
-
labels = [
|
88
|
-
("labelset-1", "label-1"),
|
89
|
-
("labelset-1", "label-2"),
|
90
|
-
("labelset-2", "label-1"),
|
91
|
-
("labelset-2", "label-3"),
|
92
|
-
]
|
93
|
-
bm.basic.usermetadata.classifications.extend(
|
94
|
-
[Classification(labelset=labelset, label=label) for labelset, label in labels]
|
95
|
-
)
|
96
|
-
|
97
|
-
await processor.process(message=bm, seqid=1)
|
98
|
-
|
99
|
-
index = get_indexing()
|
100
|
-
storage = await get_storage(service_name=SERVICE_NAME)
|
101
|
-
|
102
|
-
pb = await storage.get_indexing(index._calls[0][1])
|
103
|
-
|
104
|
-
for i, (labelset, label) in enumerate(labels):
|
105
|
-
assert pb.relations[i].relation == Relation.RelationType.ABOUT
|
106
|
-
assert pb.relations[i].source.value == rid
|
107
|
-
assert pb.relations[i].to.value == f"{labelset}/{label}"
|
108
|
-
|
109
|
-
|
110
|
-
@pytest.mark.asyncio
|
111
|
-
async def test_ingest_colab_relation_extraction(
|
112
|
-
fake_node, local_files, storage, knowledgebox_ingest, processor
|
113
|
-
):
|
114
|
-
rid = str(uuid.uuid4())
|
115
|
-
bm = BrokerMessage(
|
116
|
-
kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
|
117
|
-
)
|
118
|
-
|
119
|
-
collaborators = ["Alice", "Bob", "Trudy"]
|
120
|
-
bm.origin.colaborators.extend(collaborators)
|
121
|
-
|
122
|
-
await processor.process(message=bm, seqid=1)
|
123
|
-
|
124
|
-
index = get_indexing()
|
125
|
-
storage = await get_storage(service_name=SERVICE_NAME)
|
126
|
-
|
127
|
-
pb = await storage.get_indexing(index._calls[0][1])
|
128
|
-
|
129
|
-
for i, collaborator in enumerate(collaborators):
|
130
|
-
assert pb.relations[i].relation == Relation.RelationType.COLAB
|
131
|
-
assert pb.relations[i].source.value == rid
|
132
|
-
assert pb.relations[i].to.value == collaborator
|
133
|
-
|
134
|
-
|
135
|
-
@pytest.mark.asyncio
|
136
|
-
async def test_ingest_field_metadata_relation_extraction(
|
137
|
-
fake_node, local_files, storage, knowledgebox_ingest, processor
|
138
|
-
):
|
139
|
-
rid = str(uuid.uuid4())
|
140
|
-
bm = BrokerMessage(
|
141
|
-
kbid=knowledgebox_ingest,
|
142
|
-
uuid=rid,
|
143
|
-
slug="slug-1",
|
144
|
-
type=BrokerMessage.AUTOCOMMIT,
|
145
|
-
texts={
|
146
|
-
"title": FieldText(
|
147
|
-
body="Title with metadata",
|
148
|
-
format=FieldText.Format.PLAIN,
|
149
|
-
)
|
150
|
-
},
|
151
|
-
)
|
152
|
-
|
153
|
-
fcmw = FieldComputedMetadataWrapper(
|
154
|
-
field=FieldID(
|
155
|
-
field_type=FieldType.TEXT,
|
156
|
-
field="title",
|
157
|
-
)
|
158
|
-
)
|
159
|
-
fcmw.metadata.metadata.positions["subtype-1/value-1"].entity = "value-1"
|
160
|
-
fcmw.metadata.metadata.positions["subtype-1/value-2"].entity = "value-2"
|
161
|
-
|
162
|
-
fcmw.metadata.metadata.classifications.extend(
|
163
|
-
[
|
164
|
-
Classification(labelset="ls1", label="label1"),
|
165
|
-
]
|
166
|
-
)
|
167
|
-
|
168
|
-
bm.field_metadata.append(fcmw)
|
169
|
-
|
170
|
-
await processor.process(message=bm, seqid=1)
|
171
|
-
|
172
|
-
index = get_indexing()
|
173
|
-
storage = await get_storage(service_name=SERVICE_NAME)
|
174
|
-
|
175
|
-
pb = await storage.get_indexing(index._calls[0][1])
|
176
|
-
|
177
|
-
generated_relations = [
|
178
|
-
# From ner metadata
|
179
|
-
Relation(
|
180
|
-
relation=Relation.RelationType.ENTITY,
|
181
|
-
source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
|
182
|
-
to=RelationNode(
|
183
|
-
value="value-1", ntype=RelationNode.NodeType.ENTITY, subtype="subtype-1"
|
184
|
-
),
|
185
|
-
),
|
186
|
-
Relation(
|
187
|
-
relation=Relation.RelationType.ENTITY,
|
188
|
-
source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
|
189
|
-
to=RelationNode(
|
190
|
-
value="value-2", ntype=RelationNode.NodeType.ENTITY, subtype="subtype-1"
|
191
|
-
),
|
192
|
-
),
|
193
|
-
# From classification metadata
|
194
|
-
Relation(
|
195
|
-
relation=Relation.RelationType.ABOUT,
|
196
|
-
source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
|
197
|
-
to=RelationNode(
|
198
|
-
value="ls1/label1",
|
199
|
-
ntype=RelationNode.NodeType.LABEL,
|
200
|
-
),
|
201
|
-
),
|
202
|
-
]
|
203
|
-
for generated_relation in generated_relations:
|
204
|
-
assert generated_relation in pb.relations
|
205
|
-
|
206
|
-
|
207
|
-
@pytest.mark.asyncio
|
208
|
-
async def test_ingest_field_relations_relation_extraction(
|
209
|
-
fake_node, local_files, storage, knowledgebox_ingest, processor
|
210
|
-
):
|
211
|
-
rid = str(uuid.uuid4())
|
212
|
-
bm = BrokerMessage(
|
213
|
-
kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
|
214
|
-
)
|
215
|
-
|
216
|
-
relationnode = RelationNode(
|
217
|
-
value=rid, ntype=RelationNode.NodeType.RESOURCE, subtype="subtype-1"
|
218
|
-
)
|
219
|
-
test_relations = [
|
220
|
-
Relation(
|
221
|
-
relation=Relation.RelationType.CHILD,
|
222
|
-
source=relationnode,
|
223
|
-
to=RelationNode(
|
224
|
-
value="document",
|
225
|
-
ntype=RelationNode.NodeType.RESOURCE,
|
226
|
-
),
|
227
|
-
),
|
228
|
-
Relation(
|
229
|
-
relation=Relation.RelationType.ABOUT,
|
230
|
-
source=relationnode,
|
231
|
-
to=RelationNode(
|
232
|
-
value="label",
|
233
|
-
ntype=RelationNode.NodeType.LABEL,
|
234
|
-
),
|
235
|
-
),
|
236
|
-
Relation(
|
237
|
-
relation=Relation.RelationType.ENTITY,
|
238
|
-
source=relationnode,
|
239
|
-
to=RelationNode(
|
240
|
-
value="entity",
|
241
|
-
ntype=RelationNode.NodeType.ENTITY,
|
242
|
-
),
|
243
|
-
),
|
244
|
-
Relation(
|
245
|
-
relation=Relation.RelationType.COLAB,
|
246
|
-
source=relationnode,
|
247
|
-
to=RelationNode(
|
248
|
-
value="user",
|
249
|
-
ntype=RelationNode.NodeType.USER,
|
250
|
-
),
|
251
|
-
),
|
252
|
-
Relation(
|
253
|
-
relation=Relation.RelationType.OTHER,
|
254
|
-
source=relationnode,
|
255
|
-
to=RelationNode(
|
256
|
-
value="other",
|
257
|
-
ntype=RelationNode.NodeType.RESOURCE,
|
258
|
-
),
|
259
|
-
),
|
260
|
-
]
|
261
|
-
bm.relations.extend(test_relations)
|
262
|
-
|
263
|
-
await processor.process(message=bm, seqid=1)
|
264
|
-
|
265
|
-
index = get_indexing()
|
266
|
-
storage = await get_storage(service_name=SERVICE_NAME)
|
267
|
-
|
268
|
-
pb = await storage.get_indexing(index._calls[0][1])
|
269
|
-
|
270
|
-
assert len(pb.relations) == len(test_relations)
|
271
|
-
for relation in test_relations:
|
272
|
-
assert relation in pb.relations
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
@@ -1,139 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
import asyncio
|
22
|
-
from unittest.mock import AsyncMock, MagicMock, patch
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
from nucliadb_protos.audit_pb2 import AuditKBCounter, AuditRequest
|
26
|
-
from nucliadb_protos.writer_pb2 import Audit, BrokerMessage, Notification, ShardObject
|
27
|
-
|
28
|
-
from nucliadb.ingest.consumer import auditing
|
29
|
-
from nucliadb_protos import nodereader_pb2
|
30
|
-
|
31
|
-
pytestmark = pytest.mark.asyncio
|
32
|
-
|
33
|
-
|
34
|
-
@pytest.fixture()
|
35
|
-
def pubsub():
|
36
|
-
mock = AsyncMock()
|
37
|
-
mock.parse = lambda x: x
|
38
|
-
yield mock
|
39
|
-
|
40
|
-
|
41
|
-
@pytest.fixture()
|
42
|
-
def reader():
|
43
|
-
yield AsyncMock()
|
44
|
-
|
45
|
-
|
46
|
-
@pytest.fixture()
|
47
|
-
def shard_manager(reader):
|
48
|
-
nm = MagicMock()
|
49
|
-
node = MagicMock(reader=reader)
|
50
|
-
nm.get_shards_by_kbid = AsyncMock(return_value=[ShardObject()])
|
51
|
-
with (
|
52
|
-
patch("nucliadb.ingest.consumer.auditing.get_shard_manager", return_value=nm),
|
53
|
-
patch(
|
54
|
-
"nucliadb.ingest.consumer.auditing.choose_node",
|
55
|
-
return_value=(node, "shard_id"),
|
56
|
-
),
|
57
|
-
):
|
58
|
-
yield nm
|
59
|
-
|
60
|
-
|
61
|
-
@pytest.fixture()
|
62
|
-
def audit():
|
63
|
-
yield AsyncMock()
|
64
|
-
|
65
|
-
|
66
|
-
@pytest.fixture()
|
67
|
-
async def index_audit_handler(pubsub, audit, shard_manager):
|
68
|
-
iah = auditing.IndexAuditHandler(
|
69
|
-
audit=audit,
|
70
|
-
pubsub=pubsub,
|
71
|
-
check_delay=0.05,
|
72
|
-
)
|
73
|
-
await iah.initialize()
|
74
|
-
yield iah
|
75
|
-
await iah.finalize()
|
76
|
-
|
77
|
-
|
78
|
-
@pytest.fixture()
|
79
|
-
async def writes_audit_handler(pubsub, audit, shard_manager):
|
80
|
-
rwah = auditing.ResourceWritesAuditHandler(
|
81
|
-
storage=AsyncMock(),
|
82
|
-
audit=audit,
|
83
|
-
pubsub=pubsub,
|
84
|
-
)
|
85
|
-
await rwah.initialize()
|
86
|
-
yield rwah
|
87
|
-
await rwah.finalize()
|
88
|
-
|
89
|
-
|
90
|
-
async def test_handle_message(
|
91
|
-
index_audit_handler: auditing.IndexAuditHandler, reader, audit
|
92
|
-
):
|
93
|
-
reader.GetShard.return_value = nodereader_pb2.Shard(fields=5, paragraphs=6)
|
94
|
-
|
95
|
-
notif = Notification(
|
96
|
-
kbid="kbid",
|
97
|
-
action=Notification.Action.INDEXED,
|
98
|
-
)
|
99
|
-
await index_audit_handler.handle_message(notif.SerializeToString())
|
100
|
-
|
101
|
-
await asyncio.sleep(0.06)
|
102
|
-
|
103
|
-
audit.report.assert_called_with(
|
104
|
-
kbid="kbid",
|
105
|
-
audit_type=AuditRequest.AuditType.INDEXED,
|
106
|
-
kb_counter=AuditKBCounter(fields=5, paragraphs=6),
|
107
|
-
)
|
108
|
-
|
109
|
-
|
110
|
-
async def test_handle_message_ignore_not_indexed(
|
111
|
-
index_audit_handler: auditing.IndexAuditHandler, audit
|
112
|
-
):
|
113
|
-
notif = Notification(
|
114
|
-
kbid="kbid",
|
115
|
-
action=Notification.Action.COMMIT,
|
116
|
-
)
|
117
|
-
await index_audit_handler.handle_message(notif.SerializeToString())
|
118
|
-
|
119
|
-
await index_audit_handler.finalize()
|
120
|
-
|
121
|
-
audit.report.assert_not_called()
|
122
|
-
|
123
|
-
|
124
|
-
async def test_resource_handle_message_processor_messages_are_not_audited(
|
125
|
-
writes_audit_handler: auditing.ResourceWritesAuditHandler, audit
|
126
|
-
):
|
127
|
-
message_audit = Audit()
|
128
|
-
message_audit.message_source = BrokerMessage.MessageSource.PROCESSOR
|
129
|
-
notif = Notification(
|
130
|
-
kbid="kbid",
|
131
|
-
action=Notification.Action.COMMIT,
|
132
|
-
write_type=Notification.WriteType.MODIFIED,
|
133
|
-
message_audit=message_audit,
|
134
|
-
)
|
135
|
-
await writes_audit_handler.handle_message(notif.SerializeToString())
|
136
|
-
|
137
|
-
await writes_audit_handler.finalize()
|
138
|
-
|
139
|
-
audit.report.assert_not_called()
|
@@ -1,69 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest.mock import AsyncMock, MagicMock, Mock
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
|
24
|
-
|
25
|
-
from nucliadb.ingest.consumer.consumer import IngestConsumer
|
26
|
-
|
27
|
-
|
28
|
-
@pytest.fixture()
|
29
|
-
def storage():
|
30
|
-
mock = MagicMock()
|
31
|
-
mock.get_stream_message = AsyncMock()
|
32
|
-
mock.del_stream_message = AsyncMock()
|
33
|
-
yield mock
|
34
|
-
|
35
|
-
|
36
|
-
@pytest.fixture()
|
37
|
-
def consumer(storage):
|
38
|
-
yield IngestConsumer(None, "partition", storage, None)
|
39
|
-
|
40
|
-
|
41
|
-
@pytest.mark.asyncio
|
42
|
-
async def test_get_broker_message(consumer: IngestConsumer, storage):
|
43
|
-
bm = BrokerMessage(kbid="kbid")
|
44
|
-
msg = Mock(data=bm.SerializeToString(), headers={})
|
45
|
-
assert bm == await consumer.get_broker_message(msg)
|
46
|
-
storage.get_stream_message.assert_not_called()
|
47
|
-
|
48
|
-
|
49
|
-
@pytest.mark.asyncio
|
50
|
-
async def test_get_broker_message_proxied(consumer: IngestConsumer, storage):
|
51
|
-
bm = BrokerMessage(kbid="kbid")
|
52
|
-
bmr = BrokerMessageBlobReference(kbid="kbid", storage_key="storage_key")
|
53
|
-
msg = Mock(data=bmr.SerializeToString(), headers={"X-MESSAGE-TYPE": "PROXY"})
|
54
|
-
|
55
|
-
storage.get_stream_message.return_value = bm.SerializeToString()
|
56
|
-
|
57
|
-
assert bm == await consumer.get_broker_message(msg)
|
58
|
-
|
59
|
-
storage.get_stream_message.assert_awaited_once_with("storage_key")
|
60
|
-
|
61
|
-
|
62
|
-
@pytest.mark.asyncio
|
63
|
-
async def test_clean_broker_message_proxied(consumer: IngestConsumer, storage):
|
64
|
-
bmr = BrokerMessageBlobReference(kbid="kbid", storage_key="storage_key")
|
65
|
-
msg = Mock(data=bmr.SerializeToString(), headers={"X-MESSAGE-TYPE": "PROXY"})
|
66
|
-
|
67
|
-
await consumer.clean_broker_message(msg)
|
68
|
-
|
69
|
-
storage.del_stream_message.assert_awaited_once_with("storage_key")
|
@@ -1,60 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
from unittest.mock import AsyncMock, MagicMock, patch
|
20
|
-
|
21
|
-
import pytest
|
22
|
-
|
23
|
-
from nucliadb.ingest.consumer.pull import PullWorker
|
24
|
-
|
25
|
-
|
26
|
-
class TestPullWorker:
|
27
|
-
"""
|
28
|
-
It's a complex class so this might get a little messy with mocks
|
29
|
-
|
30
|
-
It should be refactor at some point and these tests be rewritten/removed
|
31
|
-
"""
|
32
|
-
|
33
|
-
@pytest.fixture()
|
34
|
-
def processor(self):
|
35
|
-
processor = AsyncMock()
|
36
|
-
with patch("nucliadb.ingest.consumer.pull.Processor", return_value=processor):
|
37
|
-
yield processor
|
38
|
-
|
39
|
-
@pytest.fixture()
|
40
|
-
def nats_conn(self):
|
41
|
-
conn = MagicMock()
|
42
|
-
conn.jetstream.return_value = AsyncMock()
|
43
|
-
conn.drain = AsyncMock()
|
44
|
-
conn.close = AsyncMock()
|
45
|
-
with patch("nucliadb.ingest.consumer.pull.nats.connect", return_value=conn):
|
46
|
-
yield conn
|
47
|
-
|
48
|
-
@pytest.fixture()
|
49
|
-
def worker(self, processor):
|
50
|
-
yield PullWorker(
|
51
|
-
driver=AsyncMock(),
|
52
|
-
partition="1",
|
53
|
-
storage=AsyncMock(),
|
54
|
-
pull_time_error_backoff=100,
|
55
|
-
zone="zone",
|
56
|
-
nuclia_processing_cluster_url="nuclia_processing_cluster_url",
|
57
|
-
nuclia_public_url="nuclia_public_url",
|
58
|
-
audit=None,
|
59
|
-
onprem=False,
|
60
|
-
)
|