nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,136 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
import asyncio
|
21
|
-
|
22
|
-
import aiohttp
|
23
|
-
import pytest
|
24
|
-
from nucliadb_protos.dataset_pb2 import TaskType, TokenClassificationBatch, TrainSet
|
25
|
-
from nucliadb_protos.resources_pb2 import Position
|
26
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
27
|
-
from nucliadb_protos.writer_pb2_grpc import WriterStub
|
28
|
-
|
29
|
-
from nucliadb.tests.utils import inject_message
|
30
|
-
from nucliadb.tests.utils.broker_messages import BrokerMessageBuilder, FieldBuilder
|
31
|
-
from nucliadb.train import API_PREFIX
|
32
|
-
from nucliadb.train.api.v1.router import KB_PREFIX
|
33
|
-
from nucliadb.train.tests.utils import get_batches_from_train_response_stream
|
34
|
-
from nucliadb_protos import resources_pb2 as rpb
|
35
|
-
|
36
|
-
|
37
|
-
@pytest.mark.asyncio
|
38
|
-
@pytest.mark.parametrize("knowledgebox", ["STABLE", "EXPERIMENTAL"], indirect=True)
|
39
|
-
async def test_generator_token_classification(
|
40
|
-
train_rest_api: aiohttp.ClientSession,
|
41
|
-
knowledgebox_with_entities: str,
|
42
|
-
nucliadb_grpc: WriterStub,
|
43
|
-
):
|
44
|
-
kbid = knowledgebox_with_entities
|
45
|
-
|
46
|
-
await inject_resource_with_token_classification(kbid, nucliadb_grpc)
|
47
|
-
|
48
|
-
async with train_rest_api.get(
|
49
|
-
f"/{API_PREFIX}/v1/{KB_PREFIX}/{kbid}/trainset"
|
50
|
-
) as partitions:
|
51
|
-
assert partitions.status == 200
|
52
|
-
data = await partitions.json()
|
53
|
-
assert len(data["partitions"]) == 1
|
54
|
-
partition_id = data["partitions"][0]
|
55
|
-
|
56
|
-
trainset = TrainSet()
|
57
|
-
trainset.type = TaskType.TOKEN_CLASSIFICATION
|
58
|
-
trainset.batch_size = 2
|
59
|
-
trainset.filter.labels.append("PERSON")
|
60
|
-
trainset.filter.labels.append("ORG")
|
61
|
-
async with train_rest_api.post(
|
62
|
-
f"/{API_PREFIX}/v1/{KB_PREFIX}/{kbid}/trainset/{partition_id}",
|
63
|
-
data=trainset.SerializeToString(),
|
64
|
-
) as response:
|
65
|
-
assert response.status == 200
|
66
|
-
batches: list[TokenClassificationBatch] = []
|
67
|
-
async for batch in get_batches_from_train_response_stream(
|
68
|
-
response, TokenClassificationBatch
|
69
|
-
):
|
70
|
-
batches.append(batch)
|
71
|
-
|
72
|
-
for batch in batches:
|
73
|
-
if batch.data[0].token == "Eudald":
|
74
|
-
assert batch.data[0].label == "B-PERSON"
|
75
|
-
assert batch.data[1].label == "I-PERSON"
|
76
|
-
assert batch.data[2].label == "O"
|
77
|
-
if batch.data[0].token == "This":
|
78
|
-
assert batch.data[4].label == "B-PERSON"
|
79
|
-
assert batch.data[5].label == "I-PERSON"
|
80
|
-
if batch.data[0].token == "Where":
|
81
|
-
assert batch.data[3].label == "B-ORG"
|
82
|
-
assert batch.data[4].label == "I-ORG"
|
83
|
-
assert batch.data[5].label == "I-ORG"
|
84
|
-
if batch.data[0].token == "Summary":
|
85
|
-
assert batch.data[2].label == "B-ORG"
|
86
|
-
assert batch.data[4].label == "B-ORG"
|
87
|
-
if batch.data[0].token == "My":
|
88
|
-
assert batch.data[3].label == "B-PERSON"
|
89
|
-
assert batch.data[12].label == "B-ORG"
|
90
|
-
|
91
|
-
|
92
|
-
async def inject_resource_with_token_classification(knowledgebox, writer):
|
93
|
-
bm = broker_resource(knowledgebox)
|
94
|
-
await inject_message(writer, bm)
|
95
|
-
await asyncio.sleep(0.1)
|
96
|
-
return bm.uuid
|
97
|
-
|
98
|
-
|
99
|
-
def broker_resource(knowledgebox: str) -> BrokerMessage:
|
100
|
-
bmb = BrokerMessageBuilder(kbid=knowledgebox)
|
101
|
-
|
102
|
-
bmb.with_title("This is a bird, its a plane, no, its el Super Fran")
|
103
|
-
title_field = bmb.field_builder("title", rpb.FieldType.GENERIC)
|
104
|
-
title_field.with_extracted_entity(
|
105
|
-
"PERSON", "el Super Fran", positions=[Position(start=37, end=50)]
|
106
|
-
)
|
107
|
-
|
108
|
-
bmb.with_summary("Summary of Nuclia using Debian")
|
109
|
-
summary_field = bmb.field_builder("summary", rpb.FieldType.GENERIC)
|
110
|
-
summary_field.with_extracted_entity(
|
111
|
-
"ORG", "Nuclia", positions=[Position(start=11, end=17)]
|
112
|
-
)
|
113
|
-
summary_field.with_extracted_entity(
|
114
|
-
"ORG", "Debian", positions=[Position(start=24, end=30)]
|
115
|
-
)
|
116
|
-
|
117
|
-
file_field = FieldBuilder("file", rpb.FieldType.FILE)
|
118
|
-
file_field.with_extracted_text(
|
119
|
-
"My own text Ramon. This is great to be at Nuclia. \n Where is the Generalitat de Catalunya? Eudald Camprubi, do you want to go shooping? This is a test Carmen Iniesta!" # noqa
|
120
|
-
)
|
121
|
-
file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=0, end=49))
|
122
|
-
file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=50, end=90))
|
123
|
-
file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=91, end=135))
|
124
|
-
file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=136, end=166))
|
125
|
-
|
126
|
-
file_field.with_user_entity("PERSON", "Ramon", start=12, end=17)
|
127
|
-
file_field.with_user_entity("ORG", "Nuclia", start=42, end=48)
|
128
|
-
file_field.with_user_entity("ORG", "Generalitat de Catalunya", start=65, end=89)
|
129
|
-
file_field.with_user_entity("PERSON", "Eudald", start=91, end=106)
|
130
|
-
file_field.with_user_entity("PERSON", "Carmen Iniesta", start=151, end=165)
|
131
|
-
|
132
|
-
bmb.add_field_builder(file_field)
|
133
|
-
|
134
|
-
bm = bmb.build()
|
135
|
-
|
136
|
-
return bm
|
nucliadb/train/tests/utils.py
DELETED
@@ -1,101 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
from typing import AsyncGenerator, overload
|
21
|
-
|
22
|
-
import aiohttp
|
23
|
-
from nucliadb_protos.dataset_pb2 import (
|
24
|
-
FieldClassificationBatch,
|
25
|
-
ImageClassificationBatch,
|
26
|
-
ParagraphClassificationBatch,
|
27
|
-
ParagraphStreamingBatch,
|
28
|
-
QuestionAnswerStreamingBatch,
|
29
|
-
SentenceClassificationBatch,
|
30
|
-
TokenClassificationBatch,
|
31
|
-
)
|
32
|
-
|
33
|
-
from nucliadb.train.types import TrainBatch, TrainBatchType
|
34
|
-
|
35
|
-
# NOTE: we use def instead of async def to make mypy happy. Otherwise, it
|
36
|
-
# considers the overloaded functions as corountines returning async iterators
|
37
|
-
# instead of async iterators themselves and complains about it
|
38
|
-
|
39
|
-
|
40
|
-
@overload
|
41
|
-
def get_batches_from_train_response_stream(
|
42
|
-
response: aiohttp.ClientResponse,
|
43
|
-
pb_klass: type[FieldClassificationBatch],
|
44
|
-
) -> AsyncGenerator[FieldClassificationBatch, None]: ...
|
45
|
-
|
46
|
-
|
47
|
-
@overload
|
48
|
-
def get_batches_from_train_response_stream(
|
49
|
-
response: aiohttp.ClientResponse,
|
50
|
-
pb_klass: type[ImageClassificationBatch],
|
51
|
-
) -> AsyncGenerator[ImageClassificationBatch, None]: ...
|
52
|
-
|
53
|
-
|
54
|
-
@overload
|
55
|
-
def get_batches_from_train_response_stream(
|
56
|
-
response: aiohttp.ClientResponse,
|
57
|
-
pb_klass: type[ParagraphClassificationBatch],
|
58
|
-
) -> AsyncGenerator[ParagraphClassificationBatch, None]: ...
|
59
|
-
|
60
|
-
|
61
|
-
@overload
|
62
|
-
def get_batches_from_train_response_stream(
|
63
|
-
response: aiohttp.ClientResponse,
|
64
|
-
pb_klass: type[ParagraphStreamingBatch],
|
65
|
-
) -> AsyncGenerator[ParagraphStreamingBatch, None]: ...
|
66
|
-
|
67
|
-
|
68
|
-
@overload
|
69
|
-
def get_batches_from_train_response_stream(
|
70
|
-
response: aiohttp.ClientResponse,
|
71
|
-
pb_klass: type[QuestionAnswerStreamingBatch],
|
72
|
-
) -> AsyncGenerator[QuestionAnswerStreamingBatch, None]: ...
|
73
|
-
|
74
|
-
|
75
|
-
@overload
|
76
|
-
def get_batches_from_train_response_stream(
|
77
|
-
response: aiohttp.ClientResponse,
|
78
|
-
pb_klass: type[SentenceClassificationBatch],
|
79
|
-
) -> AsyncGenerator[SentenceClassificationBatch, None]: ...
|
80
|
-
|
81
|
-
|
82
|
-
@overload
|
83
|
-
def get_batches_from_train_response_stream(
|
84
|
-
response: aiohttp.ClientResponse,
|
85
|
-
pb_klass: type[TokenClassificationBatch],
|
86
|
-
) -> AsyncGenerator[TokenClassificationBatch, None]: ...
|
87
|
-
|
88
|
-
|
89
|
-
async def get_batches_from_train_response_stream(
|
90
|
-
response: aiohttp.ClientResponse,
|
91
|
-
pb_klass: TrainBatchType,
|
92
|
-
) -> AsyncGenerator[TrainBatch, None]:
|
93
|
-
while True:
|
94
|
-
header = await response.content.read(4)
|
95
|
-
if header == b"":
|
96
|
-
break
|
97
|
-
payload_size = int.from_bytes(header, byteorder="big", signed=False)
|
98
|
-
payload = await response.content.read(payload_size)
|
99
|
-
batch = pb_klass()
|
100
|
-
batch.ParseFromString(payload)
|
101
|
-
yield batch
|
@@ -1,51 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from typing import Any, Callable, Coroutine
|
21
|
-
|
22
|
-
from nucliadb_protos.resources_pb2 import FieldLayout
|
23
|
-
|
24
|
-
import nucliadb_models as models
|
25
|
-
from nucliadb_utils.storages.storage import Storage
|
26
|
-
|
27
|
-
VERSION: dict[
|
28
|
-
int,
|
29
|
-
Callable[
|
30
|
-
[models.InputLayoutField, str, str, str, Storage],
|
31
|
-
Coroutine[Any, Any, FieldLayout],
|
32
|
-
],
|
33
|
-
] = {}
|
34
|
-
|
35
|
-
import nucliadb.writer.layouts.v1 # noqa isort:skip
|
36
|
-
|
37
|
-
|
38
|
-
async def serialize_blocks(
|
39
|
-
layout_field: models.InputLayoutField,
|
40
|
-
kbid: str,
|
41
|
-
uuid: str,
|
42
|
-
field: str,
|
43
|
-
storage: Storage,
|
44
|
-
) -> FieldLayout:
|
45
|
-
if layout_field.format in VERSION:
|
46
|
-
layout = await VERSION[layout_field.format](
|
47
|
-
layout_field, kbid, uuid, field, storage
|
48
|
-
)
|
49
|
-
else:
|
50
|
-
raise KeyError("Invalid version")
|
51
|
-
return layout
|
nucliadb/writer/layouts/v1.py
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from nucliadb_protos.resources_pb2 import Block as PBBlock
|
21
|
-
from nucliadb_protos.resources_pb2 import FieldLayout
|
22
|
-
|
23
|
-
import nucliadb_models as models
|
24
|
-
from nucliadb.writer.layouts import VERSION
|
25
|
-
from nucliadb_utils.storages.storage import Storage
|
26
|
-
|
27
|
-
|
28
|
-
async def serialize_block(
|
29
|
-
layout_field: models.InputLayoutField,
|
30
|
-
kbid: str,
|
31
|
-
uuid: str,
|
32
|
-
field: str,
|
33
|
-
storage: Storage,
|
34
|
-
) -> FieldLayout:
|
35
|
-
pblayout = FieldLayout()
|
36
|
-
for key, block in layout_field.body.blocks.items():
|
37
|
-
pbblock = PBBlock()
|
38
|
-
pbblock.x = block.x
|
39
|
-
pbblock.y = block.y
|
40
|
-
pbblock.cols = block.cols
|
41
|
-
pbblock.rows = block.rows
|
42
|
-
pbblock.type = PBBlock.TypeBlock.Value(block.type)
|
43
|
-
pbblock.ident = block.ident if block.ident else key
|
44
|
-
pbblock.payload = block.payload
|
45
|
-
|
46
|
-
sf = storage.layout_field(kbid, uuid, field, key)
|
47
|
-
await storage.upload_b64file_to_cloudfile(
|
48
|
-
sf,
|
49
|
-
block.file.payload.encode(),
|
50
|
-
block.file.filename,
|
51
|
-
block.file.content_type,
|
52
|
-
block.file.md5,
|
53
|
-
)
|
54
|
-
pblayout.body.blocks[key].CopyFrom(pbblock)
|
55
|
-
pblayout.format = FieldLayout.Format.Value(layout_field.format.value)
|
56
|
-
return pblayout
|
57
|
-
|
58
|
-
|
59
|
-
VERSION[models.LayoutFormat.NUCLIAv1] = serialize_block
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
pytest_plugins = [
|
21
|
-
"pytest_mock",
|
22
|
-
"pytest_docker_fixtures",
|
23
|
-
"nucliadb_utils.tests.nats",
|
24
|
-
"nucliadb.tests.fixtures",
|
25
|
-
"nucliadb.tests.tikv",
|
26
|
-
"nucliadb.ingest.tests.fixtures", # should be refactored out
|
27
|
-
"nucliadb.writer.tests.fixtures",
|
28
|
-
"nucliadb_utils.tests.conftest",
|
29
|
-
"nucliadb_utils.tests.gcs",
|
30
|
-
"nucliadb_utils.tests.s3",
|
31
|
-
]
|
@@ -1,191 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from enum import Enum
|
21
|
-
from typing import AsyncIterator, Callable, Optional
|
22
|
-
from unittest import mock
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
from httpx import AsyncClient
|
26
|
-
from pytest_lazy_fixtures import lazy_fixture
|
27
|
-
from redis import asyncio as aioredis
|
28
|
-
|
29
|
-
from nucliadb.ingest.tests.fixtures import IngestFixture
|
30
|
-
from nucliadb.writer import API_PREFIX
|
31
|
-
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
|
32
|
-
from nucliadb.writer.settings import settings
|
33
|
-
from nucliadb.writer.tus import clear_storage
|
34
|
-
from nucliadb_models.resource import NucliaDBRoles
|
35
|
-
from nucliadb_utils.settings import (
|
36
|
-
FileBackendConfig,
|
37
|
-
nuclia_settings,
|
38
|
-
nucliadb_settings,
|
39
|
-
storage_settings,
|
40
|
-
)
|
41
|
-
from nucliadb_utils.tests.conftest import get_testing_storage_backend
|
42
|
-
from nucliadb_utils.utilities import Utility, clean_utility, set_utility
|
43
|
-
|
44
|
-
|
45
|
-
@pytest.fixture(scope="function")
|
46
|
-
def disabled_back_pressure():
|
47
|
-
with mock.patch(
|
48
|
-
"nucliadb.writer.back_pressure.is_back_pressure_enabled", return_value=False
|
49
|
-
) as mocked:
|
50
|
-
yield mocked
|
51
|
-
|
52
|
-
|
53
|
-
@pytest.fixture(scope="function")
|
54
|
-
async def writer_api(
|
55
|
-
disabled_back_pressure,
|
56
|
-
redis,
|
57
|
-
storage_writer,
|
58
|
-
grpc_servicer: IngestFixture,
|
59
|
-
transaction_utility,
|
60
|
-
processing_utility,
|
61
|
-
tus_manager,
|
62
|
-
) -> AsyncIterator[Callable[[list[Enum], str, str], AsyncClient]]:
|
63
|
-
nucliadb_settings.nucliadb_ingest = grpc_servicer.host
|
64
|
-
from nucliadb.writer.app import create_application
|
65
|
-
|
66
|
-
application = create_application()
|
67
|
-
|
68
|
-
def make_client_fixture(
|
69
|
-
roles: Optional[list[Enum]] = None,
|
70
|
-
user: str = "",
|
71
|
-
version: str = "1",
|
72
|
-
) -> AsyncClient:
|
73
|
-
roles = roles or []
|
74
|
-
client_base_url = "http://test"
|
75
|
-
client_base_url = f"{client_base_url}/{API_PREFIX}/v{version}"
|
76
|
-
|
77
|
-
client = AsyncClient(app=application, base_url=client_base_url) # type: ignore
|
78
|
-
client.headers["X-NUCLIADB-ROLES"] = ";".join(
|
79
|
-
map(lambda role: role.value, roles)
|
80
|
-
)
|
81
|
-
client.headers["X-NUCLIADB-USER"] = user
|
82
|
-
|
83
|
-
return client
|
84
|
-
|
85
|
-
driver = aioredis.from_url(f"redis://{redis[0]}:{redis[1]}")
|
86
|
-
await driver.flushall()
|
87
|
-
|
88
|
-
await application.router.startup()
|
89
|
-
|
90
|
-
yield make_client_fixture
|
91
|
-
|
92
|
-
await application.router.shutdown()
|
93
|
-
clear_storage()
|
94
|
-
|
95
|
-
await driver.flushall()
|
96
|
-
await driver.close(close_connection_pool=True)
|
97
|
-
|
98
|
-
|
99
|
-
@pytest.fixture(scope="function")
|
100
|
-
def gcs_storage_writer(gcs):
|
101
|
-
storage_settings.file_backend = FileBackendConfig.GCS
|
102
|
-
storage_settings.gcs_endpoint_url = gcs
|
103
|
-
storage_settings.gcs_bucket = "test_{kbid}"
|
104
|
-
|
105
|
-
|
106
|
-
@pytest.fixture(scope="function")
|
107
|
-
def s3_storage_writer(s3):
|
108
|
-
storage_settings.file_backend = FileBackendConfig.S3
|
109
|
-
storage_settings.s3_endpoint = s3
|
110
|
-
storage_settings.s3_client_id = ""
|
111
|
-
storage_settings.s3_client_secret = ""
|
112
|
-
storage_settings.s3_bucket = "test-{kbid}"
|
113
|
-
|
114
|
-
|
115
|
-
@pytest.fixture(scope="function")
|
116
|
-
def pg_storage_writer(pg):
|
117
|
-
storage_settings.file_backend = FileBackendConfig.PG
|
118
|
-
url = f"postgresql://postgres:postgres@{pg[0]}:{pg[1]}/postgres"
|
119
|
-
storage_settings.driver_pg_url = url
|
120
|
-
|
121
|
-
|
122
|
-
def lazy_storage_writer_fixture():
|
123
|
-
backend = get_testing_storage_backend()
|
124
|
-
if backend == "gcs":
|
125
|
-
return [lazy_fixture.lf("gcs_storage_writer")]
|
126
|
-
elif backend == "s3":
|
127
|
-
return [lazy_fixture.lf("s3_storage_writer")]
|
128
|
-
elif backend == "pg":
|
129
|
-
return [lazy_fixture.lf("pg_storage_writer")]
|
130
|
-
else:
|
131
|
-
print(f"Unknown storage backend {backend}, using gcs")
|
132
|
-
return [lazy_fixture.lf("gcs_storage_writer")]
|
133
|
-
|
134
|
-
|
135
|
-
@pytest.fixture(scope="function", params=lazy_storage_writer_fixture())
|
136
|
-
async def storage_writer(request):
|
137
|
-
"""
|
138
|
-
Generic storage fixture that allows us to run the same tests for different storage backends.
|
139
|
-
"""
|
140
|
-
storage_driver = request.param
|
141
|
-
set_utility(Utility.STORAGE, storage_driver)
|
142
|
-
|
143
|
-
yield storage_driver
|
144
|
-
|
145
|
-
clean_utility(Utility.STORAGE)
|
146
|
-
|
147
|
-
|
148
|
-
@pytest.fixture(scope="function")
|
149
|
-
async def knowledgebox_writer(writer_api):
|
150
|
-
async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
|
151
|
-
resp = await client.post(
|
152
|
-
f"/{KBS_PREFIX}",
|
153
|
-
json={
|
154
|
-
"slug": "kbid1",
|
155
|
-
"title": "My Knowledge Box",
|
156
|
-
},
|
157
|
-
)
|
158
|
-
assert resp.status_code == 201
|
159
|
-
kbid = resp.json().get("uuid")
|
160
|
-
assert kbid is not None
|
161
|
-
yield kbid
|
162
|
-
|
163
|
-
|
164
|
-
@pytest.fixture(scope="function")
|
165
|
-
async def resource(redis, writer_api, knowledgebox_writer):
|
166
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
167
|
-
resp = await client.post(
|
168
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/resources",
|
169
|
-
json={
|
170
|
-
"slug": "resource1",
|
171
|
-
"title": "Resource 1",
|
172
|
-
},
|
173
|
-
)
|
174
|
-
assert resp.status_code == 201
|
175
|
-
uuid = resp.json()["uuid"]
|
176
|
-
|
177
|
-
return uuid
|
178
|
-
|
179
|
-
|
180
|
-
@pytest.fixture(scope="function")
|
181
|
-
async def processing_utility():
|
182
|
-
nuclia_settings.dummy_processing = True
|
183
|
-
nuclia_settings.onprem = True
|
184
|
-
nuclia_settings.nuclia_jwt_key = "foobarkey"
|
185
|
-
|
186
|
-
|
187
|
-
@pytest.fixture(scope="function")
|
188
|
-
async def tus_manager(redis):
|
189
|
-
settings.dm_redis_host = redis[0]
|
190
|
-
settings.dm_redis_port = redis[1]
|
191
|
-
yield
|