nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,49 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import pytest
|
21
|
-
|
22
|
-
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
|
23
|
-
from nucliadb_models.resource import NucliaDBRoles
|
24
|
-
|
25
|
-
|
26
|
-
@pytest.mark.asyncio
|
27
|
-
async def test_knowledgebox_lifecycle(writer_api):
|
28
|
-
async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
|
29
|
-
resp = await client.post(
|
30
|
-
f"/{KBS_PREFIX}",
|
31
|
-
json={
|
32
|
-
"slug": "kbid1",
|
33
|
-
"title": "My Knowledge Box",
|
34
|
-
"description": "My lovely knowledgebox",
|
35
|
-
},
|
36
|
-
)
|
37
|
-
assert resp.status_code == 201
|
38
|
-
data = resp.json()
|
39
|
-
assert data["slug"] == "kbid1"
|
40
|
-
kbid = data["uuid"]
|
41
|
-
|
42
|
-
resp = await client.patch(
|
43
|
-
f"/{KB_PREFIX}/{kbid}",
|
44
|
-
json={
|
45
|
-
"slug": "kbid2",
|
46
|
-
"description": "My lovely knowledgebox2",
|
47
|
-
},
|
48
|
-
)
|
49
|
-
assert resp.status_code == 200
|
@@ -1,133 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from typing import AsyncIterator
|
21
|
-
from unittest.mock import AsyncMock
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
from nucliadb.common import datamanagers
|
26
|
-
from nucliadb.ingest.processing import ProcessingInfo
|
27
|
-
from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RESOURCES_PREFIX
|
28
|
-
from nucliadb.writer.tests.utils import load_file_as_FileB64_payload
|
29
|
-
from nucliadb.writer.utilities import get_processing
|
30
|
-
from nucliadb_models.resource import NucliaDBRoles, QueueType
|
31
|
-
|
32
|
-
|
33
|
-
@pytest.fixture(scope="function")
|
34
|
-
def processing_mock(mocker):
|
35
|
-
processing = get_processing()
|
36
|
-
mocker.patch.object(
|
37
|
-
processing,
|
38
|
-
"send_to_process",
|
39
|
-
AsyncMock(
|
40
|
-
return_value=ProcessingInfo(seqid=0, account_seq=0, queue=QueueType.SHARED)
|
41
|
-
),
|
42
|
-
)
|
43
|
-
yield processing
|
44
|
-
|
45
|
-
|
46
|
-
@pytest.fixture(scope="function")
|
47
|
-
@pytest.mark.asyncio
|
48
|
-
async def file_field(
|
49
|
-
writer_api, knowledgebox_writer: str
|
50
|
-
) -> AsyncIterator[tuple[str, str, str]]:
|
51
|
-
kbid = knowledgebox_writer
|
52
|
-
field_id = "myfile"
|
53
|
-
|
54
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
55
|
-
resp = await client.post(
|
56
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCES_PREFIX}",
|
57
|
-
json={
|
58
|
-
"slug": "resource",
|
59
|
-
"title": "My resource",
|
60
|
-
"files": {
|
61
|
-
field_id: {
|
62
|
-
"language": "en",
|
63
|
-
"password": "xxxxxx",
|
64
|
-
"file": load_file_as_FileB64_payload(
|
65
|
-
"assets/text001.txt", "text/plain"
|
66
|
-
),
|
67
|
-
}
|
68
|
-
},
|
69
|
-
},
|
70
|
-
)
|
71
|
-
assert resp.status_code == 201
|
72
|
-
rid = resp.json()["uuid"]
|
73
|
-
|
74
|
-
assert (
|
75
|
-
await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=rid)
|
76
|
-
) is True
|
77
|
-
|
78
|
-
yield kbid, rid, field_id
|
79
|
-
|
80
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
81
|
-
resp = await client.delete(
|
82
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}",
|
83
|
-
)
|
84
|
-
assert resp.status_code == 204
|
85
|
-
|
86
|
-
|
87
|
-
@pytest.mark.asyncio
|
88
|
-
async def test_reprocess_nonexistent_file_field(
|
89
|
-
writer_api, knowledgebox_writer: str, resource: str
|
90
|
-
):
|
91
|
-
kbid = knowledgebox_writer
|
92
|
-
rid = resource
|
93
|
-
field_id = "nonexistent-field"
|
94
|
-
|
95
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
96
|
-
resp = await client.post(
|
97
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/reprocess",
|
98
|
-
)
|
99
|
-
assert resp.status_code == 404
|
100
|
-
|
101
|
-
|
102
|
-
@pytest.mark.asyncio
|
103
|
-
async def test_reprocess_file_field_with_password(
|
104
|
-
writer_api, file_field: tuple[str, str, str], processing_mock
|
105
|
-
):
|
106
|
-
kbid, rid, field_id = file_field
|
107
|
-
password = "secret-password"
|
108
|
-
|
109
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
110
|
-
resp = await client.post(
|
111
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/reprocess",
|
112
|
-
headers={
|
113
|
-
"X-FILE-PASSWORD": password,
|
114
|
-
},
|
115
|
-
)
|
116
|
-
assert resp.status_code == 202
|
117
|
-
|
118
|
-
assert processing_mock.send_to_process.await_count == 1
|
119
|
-
|
120
|
-
|
121
|
-
@pytest.mark.asyncio
|
122
|
-
async def test_reprocess_file_field_without_password(
|
123
|
-
writer_api, file_field: tuple[str, str, str], processing_mock
|
124
|
-
):
|
125
|
-
kbid, rid, field_id = file_field
|
126
|
-
|
127
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
128
|
-
resp = await client.post(
|
129
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/reprocess",
|
130
|
-
)
|
131
|
-
assert resp.status_code == 202
|
132
|
-
|
133
|
-
assert processing_mock.send_to_process.await_count == 1
|
@@ -1,476 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from datetime import datetime
|
21
|
-
from typing import Any, Callable, Optional
|
22
|
-
from unittest.mock import AsyncMock # type: ignore
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
from httpx import AsyncClient
|
26
|
-
|
27
|
-
import nucliadb_models
|
28
|
-
from nucliadb.common import datamanagers
|
29
|
-
from nucliadb.common.maindb.local import LocalDriver
|
30
|
-
from nucliadb.common.maindb.redis import RedisDriver
|
31
|
-
from nucliadb.ingest.orm.resource import Resource
|
32
|
-
from nucliadb.ingest.processing import PushPayload
|
33
|
-
from nucliadb.writer.api.v1.router import (
|
34
|
-
KB_PREFIX,
|
35
|
-
RESOURCE_PREFIX,
|
36
|
-
RESOURCES_PREFIX,
|
37
|
-
RSLUG_PREFIX,
|
38
|
-
)
|
39
|
-
from nucliadb.writer.tests.test_fields import (
|
40
|
-
TEST_CONVERSATION_PAYLOAD,
|
41
|
-
TEST_DATETIMES_PAYLOAD,
|
42
|
-
TEST_EXTERNAL_FILE_PAYLOAD,
|
43
|
-
TEST_FILE_PAYLOAD,
|
44
|
-
TEST_KEYWORDSETS_PAYLOAD,
|
45
|
-
TEST_LAYOUT_PAYLOAD,
|
46
|
-
TEST_LINK_PAYLOAD,
|
47
|
-
TEST_TEXT_PAYLOAD,
|
48
|
-
)
|
49
|
-
from nucliadb_models.resource import NucliaDBRoles
|
50
|
-
|
51
|
-
|
52
|
-
@pytest.mark.asyncio
|
53
|
-
async def test_resource_crud(
|
54
|
-
writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
|
55
|
-
):
|
56
|
-
knowledgebox_id = knowledgebox_writer
|
57
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
58
|
-
# Test create resource
|
59
|
-
resp = await client.post(
|
60
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
|
61
|
-
json={
|
62
|
-
"slug": "resource1",
|
63
|
-
"title": "My resource",
|
64
|
-
"summary": "Some summary",
|
65
|
-
"icon": "image/png",
|
66
|
-
"layout": "layout",
|
67
|
-
"metadata": {
|
68
|
-
"language": "en",
|
69
|
-
"metadata": {"key1": "value1", "key2": "value2"},
|
70
|
-
},
|
71
|
-
"fieldmetadata": [
|
72
|
-
{
|
73
|
-
"paragraphs": [
|
74
|
-
{
|
75
|
-
"key": "paragraph1",
|
76
|
-
"classifications": [
|
77
|
-
{"labelset": "ls1", "label": "label1"}
|
78
|
-
],
|
79
|
-
}
|
80
|
-
],
|
81
|
-
"token": [
|
82
|
-
{"token": "token1", "klass": "klass1", "start": 1, "end": 2}
|
83
|
-
],
|
84
|
-
"field": {"field": "text1", "field_type": "text"},
|
85
|
-
}
|
86
|
-
],
|
87
|
-
"usermetadata": {
|
88
|
-
"classifications": [{"labelset": "ls1", "label": "label1"}],
|
89
|
-
"relations": [
|
90
|
-
{
|
91
|
-
"relation": "CHILD",
|
92
|
-
"to": {
|
93
|
-
"type": "resource",
|
94
|
-
"value": "resource_uuid",
|
95
|
-
},
|
96
|
-
}
|
97
|
-
],
|
98
|
-
},
|
99
|
-
"origin": {
|
100
|
-
"source_id": "source_id",
|
101
|
-
"url": "http://some_source",
|
102
|
-
"created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
103
|
-
"modified": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
104
|
-
"metadata": {"key1": "value1", "key2": "value2"},
|
105
|
-
"tags": ["tag1", "tag2"],
|
106
|
-
"collaborators": ["col1", "col2"],
|
107
|
-
"filename": "file.pdf",
|
108
|
-
"related": ["related1"],
|
109
|
-
},
|
110
|
-
"texts": {"text1": TEST_TEXT_PAYLOAD},
|
111
|
-
"links": {"link1": TEST_LINK_PAYLOAD},
|
112
|
-
"files": {
|
113
|
-
"file1": TEST_FILE_PAYLOAD,
|
114
|
-
"external1": TEST_EXTERNAL_FILE_PAYLOAD,
|
115
|
-
},
|
116
|
-
"layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
|
117
|
-
"conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
|
118
|
-
"keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
|
119
|
-
"datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
|
120
|
-
},
|
121
|
-
)
|
122
|
-
|
123
|
-
assert resp.status_code == 201
|
124
|
-
data = resp.json()
|
125
|
-
assert "uuid" in data
|
126
|
-
assert "seqid" in data
|
127
|
-
rid = data["uuid"]
|
128
|
-
|
129
|
-
# Test update resource
|
130
|
-
resp = await client.patch(
|
131
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
|
132
|
-
json={},
|
133
|
-
)
|
134
|
-
assert resp.status_code == 200
|
135
|
-
|
136
|
-
data = resp.json()
|
137
|
-
|
138
|
-
assert "seqid" in data
|
139
|
-
|
140
|
-
# Test delete resource
|
141
|
-
resp = await client.delete(
|
142
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
|
143
|
-
)
|
144
|
-
assert resp.status_code == 204
|
145
|
-
|
146
|
-
|
147
|
-
@pytest.mark.asyncio
|
148
|
-
async def test_resource_crud_sync(
|
149
|
-
writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
|
150
|
-
):
|
151
|
-
knowledgebox_id = knowledgebox_writer
|
152
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
153
|
-
# Test create resource
|
154
|
-
resp = await client.post(
|
155
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
|
156
|
-
json={
|
157
|
-
"slug": "resource1",
|
158
|
-
"title": "My resource",
|
159
|
-
"summary": "Some summary",
|
160
|
-
"icon": "image/png",
|
161
|
-
"layout": "layout",
|
162
|
-
"metadata": {
|
163
|
-
"language": "en",
|
164
|
-
"metadata": {"key1": "value1", "key2": "value2"},
|
165
|
-
},
|
166
|
-
"fieldmetadata": [
|
167
|
-
{
|
168
|
-
"paragraphs": [
|
169
|
-
{
|
170
|
-
"key": "paragraph1",
|
171
|
-
"classifications": [
|
172
|
-
{"labelset": "ls1", "label": "label1"}
|
173
|
-
],
|
174
|
-
}
|
175
|
-
],
|
176
|
-
"token": [
|
177
|
-
{"token": "token1", "klass": "klass1", "start": 1, "end": 2}
|
178
|
-
],
|
179
|
-
"field": {"field": "text1", "field_type": "text"},
|
180
|
-
}
|
181
|
-
],
|
182
|
-
"usermetadata": {
|
183
|
-
"classifications": [{"labelset": "ls1", "label": "label1"}],
|
184
|
-
"relations": [
|
185
|
-
{
|
186
|
-
"relation": "CHILD",
|
187
|
-
"to": {
|
188
|
-
"type": "resource",
|
189
|
-
"value": "resource_uuid",
|
190
|
-
},
|
191
|
-
}
|
192
|
-
],
|
193
|
-
},
|
194
|
-
"origin": {
|
195
|
-
"source_id": "source_id",
|
196
|
-
"url": "http://some_source",
|
197
|
-
"created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
198
|
-
"modified": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
199
|
-
"metadata": {"key1": "value1", "key2": "value2"},
|
200
|
-
"tags": ["tag1", "tag2"],
|
201
|
-
"collaborators": ["col1", "col2"],
|
202
|
-
"filename": "file.pdf",
|
203
|
-
"related": ["related1"],
|
204
|
-
},
|
205
|
-
"texts": {"text1": TEST_TEXT_PAYLOAD},
|
206
|
-
"links": {"link1": TEST_LINK_PAYLOAD},
|
207
|
-
"files": {"file1": TEST_FILE_PAYLOAD},
|
208
|
-
"layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
|
209
|
-
"conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
|
210
|
-
"keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
|
211
|
-
"datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
|
212
|
-
},
|
213
|
-
)
|
214
|
-
|
215
|
-
assert resp.status_code == 201
|
216
|
-
data = resp.json()
|
217
|
-
assert "uuid" in data
|
218
|
-
assert "seqid" in data
|
219
|
-
assert "elapsed" in data
|
220
|
-
rid = data["uuid"]
|
221
|
-
|
222
|
-
assert (
|
223
|
-
await datamanagers.atomic.resources.resource_exists(
|
224
|
-
kbid=knowledgebox_id, rid=rid
|
225
|
-
)
|
226
|
-
) is True
|
227
|
-
|
228
|
-
# Test update resource
|
229
|
-
resp = await client.patch(
|
230
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
|
231
|
-
json={},
|
232
|
-
)
|
233
|
-
assert resp.status_code == 200
|
234
|
-
|
235
|
-
# Test delete resource
|
236
|
-
|
237
|
-
resp = await client.delete(
|
238
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/resource1",
|
239
|
-
)
|
240
|
-
|
241
|
-
assert resp.status_code == 404
|
242
|
-
|
243
|
-
resp = await client.delete(
|
244
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
|
245
|
-
)
|
246
|
-
assert resp.status_code == 204
|
247
|
-
|
248
|
-
assert (
|
249
|
-
await datamanagers.atomic.resources.resource_exists(
|
250
|
-
kbid=knowledgebox_id, rid=rid
|
251
|
-
)
|
252
|
-
) is False
|
253
|
-
|
254
|
-
|
255
|
-
@pytest.mark.asyncio
|
256
|
-
async def test_reprocess_resource(
|
257
|
-
writer_api: Callable[..., AsyncClient],
|
258
|
-
test_resource: Resource,
|
259
|
-
mocker,
|
260
|
-
maindb_driver,
|
261
|
-
) -> None:
|
262
|
-
if isinstance(maindb_driver, (LocalDriver, RedisDriver)):
|
263
|
-
pytest.skip("Keys might not be ordered correctly in this driver")
|
264
|
-
|
265
|
-
rsc = test_resource
|
266
|
-
kbid = rsc.kb.kbid
|
267
|
-
rid = rsc.uuid
|
268
|
-
|
269
|
-
from nucliadb.writer.utilities import get_processing
|
270
|
-
|
271
|
-
processing = get_processing()
|
272
|
-
processing.values.clear() # type: ignore
|
273
|
-
|
274
|
-
original = processing.send_to_process
|
275
|
-
mocker.patch.object(processing, "send_to_process", AsyncMock(side_effect=original))
|
276
|
-
|
277
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
278
|
-
resp = await client.post(
|
279
|
-
f"/{KB_PREFIX}/{kbid}/resource/{rid}/reprocess",
|
280
|
-
)
|
281
|
-
assert resp.status_code == 202
|
282
|
-
|
283
|
-
assert processing.send_to_process.call_count == 1 # type: ignore
|
284
|
-
payload = processing.send_to_process.call_args[0][0] # type: ignore
|
285
|
-
assert isinstance(payload, PushPayload)
|
286
|
-
assert payload.uuid == rid
|
287
|
-
assert payload.kbid == kbid
|
288
|
-
|
289
|
-
assert isinstance(payload.filefield.get("file1"), str)
|
290
|
-
assert payload.filefield["file1"] == "convert_internal_filefield_to_str,0"
|
291
|
-
assert isinstance(payload.linkfield.get("link1"), nucliadb_models.LinkUpload)
|
292
|
-
assert isinstance(payload.textfield.get("text1"), nucliadb_models.Text)
|
293
|
-
assert isinstance(
|
294
|
-
payload.layoutfield.get("layout1"), nucliadb_models.LayoutDiff
|
295
|
-
)
|
296
|
-
assert (
|
297
|
-
payload.layoutfield["layout1"].blocks["field1"].file
|
298
|
-
== "convert_internal_cf_to_str,2"
|
299
|
-
)
|
300
|
-
assert isinstance(
|
301
|
-
payload.conversationfield.get("conv1"), nucliadb_models.PushConversation
|
302
|
-
)
|
303
|
-
assert (
|
304
|
-
payload.conversationfield["conv1"].messages[33].content.attachments[0]
|
305
|
-
== "convert_internal_cf_to_str,0"
|
306
|
-
)
|
307
|
-
assert (
|
308
|
-
payload.conversationfield["conv1"].messages[33].content.attachments[1]
|
309
|
-
== "convert_internal_cf_to_str,1"
|
310
|
-
)
|
311
|
-
|
312
|
-
|
313
|
-
@pytest.mark.asyncio
|
314
|
-
@pytest.mark.parametrize(
|
315
|
-
"method,endpoint,payload",
|
316
|
-
[
|
317
|
-
["patch", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", {}],
|
318
|
-
["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reprocess", None],
|
319
|
-
["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reindex", None],
|
320
|
-
["delete", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", None],
|
321
|
-
],
|
322
|
-
)
|
323
|
-
async def test_resource_endpoints_by_slug(
|
324
|
-
writer_api: Callable[[list[str]], AsyncClient],
|
325
|
-
knowledgebox_ingest: str,
|
326
|
-
method: str,
|
327
|
-
endpoint: str,
|
328
|
-
payload: Optional[dict[Any, Any]],
|
329
|
-
):
|
330
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
331
|
-
slug = "my-resource"
|
332
|
-
resp = await client.post(
|
333
|
-
f"/{KB_PREFIX}/{knowledgebox_ingest}/{RESOURCES_PREFIX}",
|
334
|
-
json={
|
335
|
-
"slug": slug,
|
336
|
-
"texts": {"text1": {"body": "test1", "format": "PLAIN"}},
|
337
|
-
},
|
338
|
-
)
|
339
|
-
assert resp.status_code == 201
|
340
|
-
|
341
|
-
endpoint = endpoint.format(
|
342
|
-
KB_PREFIX=KB_PREFIX,
|
343
|
-
kb=knowledgebox_ingest,
|
344
|
-
RSLUG_PREFIX=RSLUG_PREFIX,
|
345
|
-
slug=slug,
|
346
|
-
)
|
347
|
-
extra_params = {}
|
348
|
-
if payload is not None:
|
349
|
-
extra_params["json"] = payload
|
350
|
-
|
351
|
-
op = getattr(client, method)
|
352
|
-
resp = await op(endpoint, **extra_params)
|
353
|
-
|
354
|
-
assert resp.status_code in (200, 202, 204)
|
355
|
-
|
356
|
-
|
357
|
-
@pytest.mark.asyncio
|
358
|
-
@pytest.mark.parametrize(
|
359
|
-
"method,endpoint,payload",
|
360
|
-
[
|
361
|
-
["patch", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", {}],
|
362
|
-
["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reprocess", None],
|
363
|
-
["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reindex", None],
|
364
|
-
["delete", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", None],
|
365
|
-
],
|
366
|
-
)
|
367
|
-
async def test_resource_endpoints_by_slug_404(
|
368
|
-
writer_api,
|
369
|
-
knowledgebox_ingest,
|
370
|
-
method,
|
371
|
-
endpoint,
|
372
|
-
payload,
|
373
|
-
):
|
374
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
375
|
-
endpoint = endpoint.format(
|
376
|
-
KB_PREFIX=KB_PREFIX,
|
377
|
-
kb=knowledgebox_ingest,
|
378
|
-
RSLUG_PREFIX=RSLUG_PREFIX,
|
379
|
-
slug="idonotexist",
|
380
|
-
)
|
381
|
-
extra_params = {}
|
382
|
-
if payload is not None:
|
383
|
-
extra_params["json"] = payload
|
384
|
-
|
385
|
-
op = getattr(client, method)
|
386
|
-
resp = await op(endpoint, **extra_params)
|
387
|
-
|
388
|
-
assert resp.status_code == 404
|
389
|
-
assert resp.json()["detail"] == "Resource does not exist"
|
390
|
-
|
391
|
-
|
392
|
-
@pytest.mark.asyncio
|
393
|
-
async def test_reindex(writer_api, test_resource):
|
394
|
-
rsc = test_resource
|
395
|
-
kbid = rsc.kb.kbid
|
396
|
-
rid = rsc.uuid
|
397
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
398
|
-
resp = await client.post(
|
399
|
-
f"/{KB_PREFIX}/{kbid}/resource/{rid}/reindex",
|
400
|
-
)
|
401
|
-
assert resp.status_code == 200
|
402
|
-
|
403
|
-
resp = await client.post(
|
404
|
-
f"/{KB_PREFIX}/{kbid}/resource/{rid}/reindex?reindex_vectors=True",
|
405
|
-
)
|
406
|
-
assert resp.status_code == 200
|
407
|
-
|
408
|
-
|
409
|
-
@pytest.mark.asyncio
|
410
|
-
async def test_paragraph_annotations(writer_api, knowledgebox_writer):
|
411
|
-
kbid = knowledgebox_writer
|
412
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
413
|
-
# Must have at least one classification
|
414
|
-
resp = await client.post(
|
415
|
-
f"/{KB_PREFIX}/{kbid}/resources",
|
416
|
-
json={
|
417
|
-
"texts": {"text1": TEST_TEXT_PAYLOAD},
|
418
|
-
"fieldmetadata": [
|
419
|
-
{
|
420
|
-
"paragraphs": [
|
421
|
-
{
|
422
|
-
"key": "paragraph1",
|
423
|
-
"classifications": [],
|
424
|
-
}
|
425
|
-
],
|
426
|
-
"field": {"field": "text1", "field_type": "text"},
|
427
|
-
}
|
428
|
-
],
|
429
|
-
},
|
430
|
-
)
|
431
|
-
assert resp.status_code == 422
|
432
|
-
body = resp.json()
|
433
|
-
assert body["detail"] == "ensure classifications has at least 1 items"
|
434
|
-
|
435
|
-
classification = {"label": "label", "labelset": "ls"}
|
436
|
-
|
437
|
-
resp = await client.post(
|
438
|
-
f"/{KB_PREFIX}/{kbid}/resources",
|
439
|
-
json={
|
440
|
-
"texts": {"text1": TEST_TEXT_PAYLOAD},
|
441
|
-
"fieldmetadata": [
|
442
|
-
{
|
443
|
-
"paragraphs": [
|
444
|
-
{
|
445
|
-
"key": "paragraph1",
|
446
|
-
"classifications": [classification],
|
447
|
-
}
|
448
|
-
],
|
449
|
-
"field": {"field": "text1", "field_type": "text"},
|
450
|
-
}
|
451
|
-
],
|
452
|
-
},
|
453
|
-
)
|
454
|
-
assert resp.status_code == 201
|
455
|
-
rid = resp.json()["uuid"]
|
456
|
-
|
457
|
-
# Classifications need to be unique
|
458
|
-
resp = await client.patch(
|
459
|
-
f"/{KB_PREFIX}/{kbid}/resource/{rid}",
|
460
|
-
json={
|
461
|
-
"fieldmetadata": [
|
462
|
-
{
|
463
|
-
"paragraphs": [
|
464
|
-
{
|
465
|
-
"key": "paragraph1",
|
466
|
-
"classifications": [classification, classification],
|
467
|
-
}
|
468
|
-
],
|
469
|
-
"field": {"field": "text1", "field_type": "text"},
|
470
|
-
}
|
471
|
-
],
|
472
|
-
},
|
473
|
-
)
|
474
|
-
assert resp.status_code == 422
|
475
|
-
body = resp.json()
|
476
|
-
assert body["detail"] == "Paragraph classifications need to be unique"
|