nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,546 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from datetime import datetime
|
21
|
-
from typing import Any, Callable, Optional
|
22
|
-
from unittest.mock import AsyncMock # type: ignore
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
from httpx import AsyncClient
|
26
|
-
from nucliadb_protos.writer_pb2 import ResourceFieldId
|
27
|
-
|
28
|
-
import nucliadb_models
|
29
|
-
from nucliadb.common.maindb.local import LocalDriver
|
30
|
-
from nucliadb.common.maindb.redis import RedisDriver
|
31
|
-
from nucliadb.ingest.orm.resource import Resource
|
32
|
-
from nucliadb.ingest.processing import PushPayload
|
33
|
-
from nucliadb.writer.api.v1.router import (
|
34
|
-
KB_PREFIX,
|
35
|
-
RESOURCE_PREFIX,
|
36
|
-
RESOURCES_PREFIX,
|
37
|
-
RSLUG_PREFIX,
|
38
|
-
)
|
39
|
-
from nucliadb.writer.tests.test_fields import (
|
40
|
-
TEST_CONVERSATION_PAYLOAD,
|
41
|
-
TEST_DATETIMES_PAYLOAD,
|
42
|
-
TEST_EXTERNAL_FILE_PAYLOAD,
|
43
|
-
TEST_FILE_PAYLOAD,
|
44
|
-
TEST_KEYWORDSETS_PAYLOAD,
|
45
|
-
TEST_LAYOUT_PAYLOAD,
|
46
|
-
TEST_LINK_PAYLOAD,
|
47
|
-
TEST_TEXT_PAYLOAD,
|
48
|
-
)
|
49
|
-
from nucliadb_models.resource import NucliaDBRoles
|
50
|
-
from nucliadb_utils.utilities import get_ingest
|
51
|
-
|
52
|
-
|
53
|
-
@pytest.mark.asyncio
|
54
|
-
async def test_resource_crud_min(
|
55
|
-
writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
|
56
|
-
):
|
57
|
-
knowledgebox_id = knowledgebox_writer
|
58
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
59
|
-
resp = await client.post(
|
60
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/vectorset/base",
|
61
|
-
json={"dimension": 3, "similarity": "dot"},
|
62
|
-
)
|
63
|
-
assert resp.status_code == 200
|
64
|
-
# Test create resource
|
65
|
-
resp = await client.post(
|
66
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
|
67
|
-
json={
|
68
|
-
"uservectors": [
|
69
|
-
{
|
70
|
-
"vectors": {
|
71
|
-
"base": {
|
72
|
-
"vector1": {
|
73
|
-
"vector": [4.0, 2.0, 3.0],
|
74
|
-
"positions": [0, 0],
|
75
|
-
}
|
76
|
-
}
|
77
|
-
},
|
78
|
-
"field": {"field_type": "file", "field": "field1"},
|
79
|
-
}
|
80
|
-
]
|
81
|
-
},
|
82
|
-
)
|
83
|
-
assert resp.status_code == 201
|
84
|
-
|
85
|
-
|
86
|
-
@pytest.mark.asyncio
|
87
|
-
async def test_resource_crud_min_no_vectorset(
|
88
|
-
writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
|
89
|
-
):
|
90
|
-
knowledgebox_id = knowledgebox_writer
|
91
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
92
|
-
# Test create resource
|
93
|
-
resp = await client.post(
|
94
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
|
95
|
-
json={
|
96
|
-
"uservectors": [
|
97
|
-
{
|
98
|
-
"vectors": {
|
99
|
-
"base": {
|
100
|
-
"vector1": {
|
101
|
-
"vector": [4.0, 2.0, 3.0],
|
102
|
-
"positions": [0, 0],
|
103
|
-
}
|
104
|
-
}
|
105
|
-
},
|
106
|
-
"field": {"field_type": "file", "field": "field1"},
|
107
|
-
}
|
108
|
-
]
|
109
|
-
},
|
110
|
-
)
|
111
|
-
assert resp.status_code == 201
|
112
|
-
|
113
|
-
|
114
|
-
@pytest.mark.asyncio
|
115
|
-
async def test_resource_crud(
|
116
|
-
writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
|
117
|
-
):
|
118
|
-
knowledgebox_id = knowledgebox_writer
|
119
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
120
|
-
# Test create resource
|
121
|
-
resp = await client.post(
|
122
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
|
123
|
-
headers={"X-SYNCHRONOUS": "True"},
|
124
|
-
json={
|
125
|
-
"slug": "resource1",
|
126
|
-
"title": "My resource",
|
127
|
-
"summary": "Some summary",
|
128
|
-
"icon": "image/png",
|
129
|
-
"layout": "layout",
|
130
|
-
"metadata": {
|
131
|
-
"language": "en",
|
132
|
-
"metadata": {"key1": "value1", "key2": "value2"},
|
133
|
-
},
|
134
|
-
"fieldmetadata": [
|
135
|
-
{
|
136
|
-
"paragraphs": [
|
137
|
-
{
|
138
|
-
"key": "paragraph1",
|
139
|
-
"classifications": [
|
140
|
-
{"labelset": "ls1", "label": "label1"}
|
141
|
-
],
|
142
|
-
}
|
143
|
-
],
|
144
|
-
"token": [
|
145
|
-
{"token": "token1", "klass": "klass1", "start": 1, "end": 2}
|
146
|
-
],
|
147
|
-
"field": {"field": "text1", "field_type": "text"},
|
148
|
-
}
|
149
|
-
],
|
150
|
-
"usermetadata": {
|
151
|
-
"classifications": [{"labelset": "ls1", "label": "label1"}],
|
152
|
-
"relations": [
|
153
|
-
{
|
154
|
-
"relation": "CHILD",
|
155
|
-
"to": {
|
156
|
-
"type": "resource",
|
157
|
-
"value": "resource_uuid",
|
158
|
-
},
|
159
|
-
}
|
160
|
-
],
|
161
|
-
},
|
162
|
-
"origin": {
|
163
|
-
"source_id": "source_id",
|
164
|
-
"url": "http://some_source",
|
165
|
-
"created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
166
|
-
"modified": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
167
|
-
"metadata": {"key1": "value1", "key2": "value2"},
|
168
|
-
"tags": ["tag1", "tag2"],
|
169
|
-
"collaborators": ["col1", "col2"],
|
170
|
-
"filename": "file.pdf",
|
171
|
-
"related": ["related1"],
|
172
|
-
},
|
173
|
-
"texts": {"text1": TEST_TEXT_PAYLOAD},
|
174
|
-
"links": {"link1": TEST_LINK_PAYLOAD},
|
175
|
-
"files": {
|
176
|
-
"file1": TEST_FILE_PAYLOAD,
|
177
|
-
"external1": TEST_EXTERNAL_FILE_PAYLOAD,
|
178
|
-
},
|
179
|
-
"layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
|
180
|
-
"conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
|
181
|
-
"keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
|
182
|
-
"datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
|
183
|
-
},
|
184
|
-
)
|
185
|
-
|
186
|
-
assert resp.status_code == 201
|
187
|
-
data = resp.json()
|
188
|
-
assert "uuid" in data
|
189
|
-
assert "seqid" in data
|
190
|
-
rid = data["uuid"]
|
191
|
-
|
192
|
-
# Test update resource
|
193
|
-
resp = await client.patch(
|
194
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
|
195
|
-
json={},
|
196
|
-
)
|
197
|
-
assert resp.status_code == 200
|
198
|
-
|
199
|
-
data = resp.json()
|
200
|
-
|
201
|
-
assert "seqid" in data
|
202
|
-
|
203
|
-
# Test delete resource
|
204
|
-
resp = await client.delete(
|
205
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
|
206
|
-
)
|
207
|
-
assert resp.status_code == 204
|
208
|
-
|
209
|
-
|
210
|
-
@pytest.mark.asyncio
|
211
|
-
async def test_resource_crud_sync(
|
212
|
-
writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
|
213
|
-
):
|
214
|
-
knowledgebox_id = knowledgebox_writer
|
215
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
216
|
-
# Test create resource
|
217
|
-
resp = await client.post(
|
218
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
|
219
|
-
headers={"X-SYNCHRONOUS": "True"},
|
220
|
-
json={
|
221
|
-
"slug": "resource1",
|
222
|
-
"title": "My resource",
|
223
|
-
"summary": "Some summary",
|
224
|
-
"icon": "image/png",
|
225
|
-
"layout": "layout",
|
226
|
-
"metadata": {
|
227
|
-
"language": "en",
|
228
|
-
"metadata": {"key1": "value1", "key2": "value2"},
|
229
|
-
},
|
230
|
-
"fieldmetadata": [
|
231
|
-
{
|
232
|
-
"paragraphs": [
|
233
|
-
{
|
234
|
-
"key": "paragraph1",
|
235
|
-
"classifications": [
|
236
|
-
{"labelset": "ls1", "label": "label1"}
|
237
|
-
],
|
238
|
-
}
|
239
|
-
],
|
240
|
-
"token": [
|
241
|
-
{"token": "token1", "klass": "klass1", "start": 1, "end": 2}
|
242
|
-
],
|
243
|
-
"field": {"field": "text1", "field_type": "text"},
|
244
|
-
}
|
245
|
-
],
|
246
|
-
"usermetadata": {
|
247
|
-
"classifications": [{"labelset": "ls1", "label": "label1"}],
|
248
|
-
"relations": [
|
249
|
-
{
|
250
|
-
"relation": "CHILD",
|
251
|
-
"to": {
|
252
|
-
"type": "resource",
|
253
|
-
"value": "resource_uuid",
|
254
|
-
},
|
255
|
-
}
|
256
|
-
],
|
257
|
-
},
|
258
|
-
"origin": {
|
259
|
-
"source_id": "source_id",
|
260
|
-
"url": "http://some_source",
|
261
|
-
"created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
262
|
-
"modified": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
263
|
-
"metadata": {"key1": "value1", "key2": "value2"},
|
264
|
-
"tags": ["tag1", "tag2"],
|
265
|
-
"collaborators": ["col1", "col2"],
|
266
|
-
"filename": "file.pdf",
|
267
|
-
"related": ["related1"],
|
268
|
-
},
|
269
|
-
"texts": {"text1": TEST_TEXT_PAYLOAD},
|
270
|
-
"links": {"link1": TEST_LINK_PAYLOAD},
|
271
|
-
"files": {"file1": TEST_FILE_PAYLOAD},
|
272
|
-
"layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
|
273
|
-
"conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
|
274
|
-
"keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
|
275
|
-
"datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
|
276
|
-
},
|
277
|
-
)
|
278
|
-
|
279
|
-
assert resp.status_code == 201
|
280
|
-
data = resp.json()
|
281
|
-
assert "uuid" in data
|
282
|
-
assert "seqid" in data
|
283
|
-
assert "elapsed" in data
|
284
|
-
rid = data["uuid"]
|
285
|
-
|
286
|
-
ingest = get_ingest()
|
287
|
-
pbrequest = ResourceFieldId()
|
288
|
-
pbrequest.kbid = knowledgebox_id
|
289
|
-
pbrequest.rid = rid
|
290
|
-
|
291
|
-
res = await ingest.ResourceFieldExists(pbrequest) # type: ignore
|
292
|
-
assert res.found
|
293
|
-
|
294
|
-
# Test update resource
|
295
|
-
resp = await client.patch(
|
296
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
|
297
|
-
headers={"X-SYNCHRONOUS": "True"},
|
298
|
-
json={},
|
299
|
-
)
|
300
|
-
assert resp.status_code == 200
|
301
|
-
|
302
|
-
# Test delete resource
|
303
|
-
|
304
|
-
resp = await client.delete(
|
305
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/resource1",
|
306
|
-
headers={"X-SYNCHRONOUS": "True"},
|
307
|
-
)
|
308
|
-
|
309
|
-
assert resp.status_code == 404
|
310
|
-
|
311
|
-
resp = await client.delete(
|
312
|
-
f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
|
313
|
-
headers={"X-SYNCHRONOUS": "True"},
|
314
|
-
)
|
315
|
-
assert resp.status_code == 204
|
316
|
-
|
317
|
-
res = await ingest.ResourceFieldExists(pbrequest) # type: ignore
|
318
|
-
assert not res.found
|
319
|
-
|
320
|
-
|
321
|
-
@pytest.mark.asyncio
|
322
|
-
async def test_reprocess_resource(
|
323
|
-
writer_api: Callable[..., AsyncClient],
|
324
|
-
test_resource: Resource,
|
325
|
-
mocker,
|
326
|
-
maindb_driver,
|
327
|
-
) -> None:
|
328
|
-
if isinstance(maindb_driver, (LocalDriver, RedisDriver)):
|
329
|
-
pytest.skip("Keys might not be ordered correctly in this driver")
|
330
|
-
|
331
|
-
rsc = test_resource
|
332
|
-
kbid = rsc.kb.kbid
|
333
|
-
rid = rsc.uuid
|
334
|
-
|
335
|
-
from nucliadb.writer.utilities import get_processing
|
336
|
-
|
337
|
-
processing = get_processing()
|
338
|
-
processing.values.clear() # type: ignore
|
339
|
-
|
340
|
-
original = processing.send_to_process
|
341
|
-
mocker.patch.object(processing, "send_to_process", AsyncMock(side_effect=original))
|
342
|
-
|
343
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
344
|
-
resp = await client.post(
|
345
|
-
f"/{KB_PREFIX}/{kbid}/resource/{rid}/reprocess",
|
346
|
-
)
|
347
|
-
assert resp.status_code == 202
|
348
|
-
|
349
|
-
assert processing.send_to_process.call_count == 1 # type: ignore
|
350
|
-
payload = processing.send_to_process.call_args[0][0] # type: ignore
|
351
|
-
assert isinstance(payload, PushPayload)
|
352
|
-
assert payload.uuid == rid
|
353
|
-
assert payload.kbid == kbid
|
354
|
-
|
355
|
-
assert isinstance(payload.filefield.get("file1"), str)
|
356
|
-
assert payload.filefield["file1"] == "convert_internal_filefield_to_str,0"
|
357
|
-
assert isinstance(payload.linkfield.get("link1"), nucliadb_models.LinkUpload)
|
358
|
-
assert isinstance(payload.textfield.get("text1"), nucliadb_models.Text)
|
359
|
-
assert isinstance(
|
360
|
-
payload.layoutfield.get("layout1"), nucliadb_models.LayoutDiff
|
361
|
-
)
|
362
|
-
assert (
|
363
|
-
payload.layoutfield["layout1"].blocks["field1"].file
|
364
|
-
== "convert_internal_cf_to_str,2"
|
365
|
-
)
|
366
|
-
assert isinstance(
|
367
|
-
payload.conversationfield.get("conv1"), nucliadb_models.PushConversation
|
368
|
-
)
|
369
|
-
assert (
|
370
|
-
payload.conversationfield["conv1"].messages[33].content.attachments[0]
|
371
|
-
== "convert_internal_cf_to_str,0"
|
372
|
-
)
|
373
|
-
assert (
|
374
|
-
payload.conversationfield["conv1"].messages[33].content.attachments[1]
|
375
|
-
== "convert_internal_cf_to_str,1"
|
376
|
-
)
|
377
|
-
|
378
|
-
|
379
|
-
@pytest.mark.asyncio
|
380
|
-
@pytest.mark.parametrize(
|
381
|
-
"method,endpoint,payload",
|
382
|
-
[
|
383
|
-
["patch", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", {}],
|
384
|
-
["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reprocess", None],
|
385
|
-
["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reindex", None],
|
386
|
-
["delete", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", None],
|
387
|
-
],
|
388
|
-
)
|
389
|
-
async def test_resource_endpoints_by_slug(
|
390
|
-
writer_api: Callable[[list[str]], AsyncClient],
|
391
|
-
knowledgebox_ingest: str,
|
392
|
-
method: str,
|
393
|
-
endpoint: str,
|
394
|
-
payload: Optional[dict[Any, Any]],
|
395
|
-
):
|
396
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
397
|
-
slug = "my-resource"
|
398
|
-
resp = await client.post(
|
399
|
-
f"/{KB_PREFIX}/{knowledgebox_ingest}/{RESOURCES_PREFIX}",
|
400
|
-
headers={"X-SYNCHRONOUS": "True"},
|
401
|
-
json={
|
402
|
-
"slug": slug,
|
403
|
-
"texts": {"text1": {"body": "test1", "format": "PLAIN"}},
|
404
|
-
},
|
405
|
-
)
|
406
|
-
assert resp.status_code == 201
|
407
|
-
|
408
|
-
endpoint = endpoint.format(
|
409
|
-
KB_PREFIX=KB_PREFIX,
|
410
|
-
kb=knowledgebox_ingest,
|
411
|
-
RSLUG_PREFIX=RSLUG_PREFIX,
|
412
|
-
slug=slug,
|
413
|
-
)
|
414
|
-
extra_params = {}
|
415
|
-
if payload is not None:
|
416
|
-
extra_params["json"] = payload
|
417
|
-
|
418
|
-
op = getattr(client, method)
|
419
|
-
resp = await op(endpoint, **extra_params)
|
420
|
-
|
421
|
-
assert resp.status_code in (200, 202, 204)
|
422
|
-
|
423
|
-
|
424
|
-
@pytest.mark.asyncio
|
425
|
-
@pytest.mark.parametrize(
|
426
|
-
"method,endpoint,payload",
|
427
|
-
[
|
428
|
-
["patch", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", {}],
|
429
|
-
["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reprocess", None],
|
430
|
-
["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reindex", None],
|
431
|
-
["delete", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", None],
|
432
|
-
],
|
433
|
-
)
|
434
|
-
async def test_resource_endpoints_by_slug_404(
|
435
|
-
writer_api,
|
436
|
-
knowledgebox_ingest,
|
437
|
-
method,
|
438
|
-
endpoint,
|
439
|
-
payload,
|
440
|
-
):
|
441
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
442
|
-
endpoint = endpoint.format(
|
443
|
-
KB_PREFIX=KB_PREFIX,
|
444
|
-
kb=knowledgebox_ingest,
|
445
|
-
RSLUG_PREFIX=RSLUG_PREFIX,
|
446
|
-
slug="idonotexist",
|
447
|
-
)
|
448
|
-
extra_params = {}
|
449
|
-
if payload is not None:
|
450
|
-
extra_params["json"] = payload
|
451
|
-
|
452
|
-
op = getattr(client, method)
|
453
|
-
resp = await op(endpoint, **extra_params)
|
454
|
-
|
455
|
-
assert resp.status_code == 404
|
456
|
-
assert resp.json()["detail"] == "Resource does not exist"
|
457
|
-
|
458
|
-
|
459
|
-
@pytest.mark.asyncio
|
460
|
-
async def test_reindex(writer_api, test_resource):
|
461
|
-
rsc = test_resource
|
462
|
-
kbid = rsc.kb.kbid
|
463
|
-
rid = rsc.uuid
|
464
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
465
|
-
resp = await client.post(
|
466
|
-
f"/{KB_PREFIX}/{kbid}/resource/{rid}/reindex",
|
467
|
-
)
|
468
|
-
assert resp.status_code == 200
|
469
|
-
|
470
|
-
resp = await client.post(
|
471
|
-
f"/{KB_PREFIX}/{kbid}/resource/{rid}/reindex?reindex_vectors=True",
|
472
|
-
)
|
473
|
-
assert resp.status_code == 200
|
474
|
-
|
475
|
-
|
476
|
-
@pytest.mark.asyncio
|
477
|
-
async def test_paragraph_annotations(writer_api, knowledgebox_writer):
|
478
|
-
kbid = knowledgebox_writer
|
479
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
480
|
-
# Must have at least one classification
|
481
|
-
resp = await client.post(
|
482
|
-
f"/{KB_PREFIX}/{kbid}/resources",
|
483
|
-
headers={"X-SYNCHRONOUS": "True"},
|
484
|
-
json={
|
485
|
-
"texts": {"text1": TEST_TEXT_PAYLOAD},
|
486
|
-
"fieldmetadata": [
|
487
|
-
{
|
488
|
-
"paragraphs": [
|
489
|
-
{
|
490
|
-
"key": "paragraph1",
|
491
|
-
"classifications": [],
|
492
|
-
}
|
493
|
-
],
|
494
|
-
"field": {"field": "text1", "field_type": "text"},
|
495
|
-
}
|
496
|
-
],
|
497
|
-
},
|
498
|
-
)
|
499
|
-
assert resp.status_code == 422
|
500
|
-
body = resp.json()
|
501
|
-
assert body["detail"] == "ensure classifications has at least 1 items"
|
502
|
-
|
503
|
-
classification = {"label": "label", "labelset": "ls"}
|
504
|
-
|
505
|
-
resp = await client.post(
|
506
|
-
f"/{KB_PREFIX}/{kbid}/resources",
|
507
|
-
headers={"X-SYNCHRONOUS": "True"},
|
508
|
-
json={
|
509
|
-
"texts": {"text1": TEST_TEXT_PAYLOAD},
|
510
|
-
"fieldmetadata": [
|
511
|
-
{
|
512
|
-
"paragraphs": [
|
513
|
-
{
|
514
|
-
"key": "paragraph1",
|
515
|
-
"classifications": [classification],
|
516
|
-
}
|
517
|
-
],
|
518
|
-
"field": {"field": "text1", "field_type": "text"},
|
519
|
-
}
|
520
|
-
],
|
521
|
-
},
|
522
|
-
)
|
523
|
-
assert resp.status_code == 201
|
524
|
-
rid = resp.json()["uuid"]
|
525
|
-
|
526
|
-
# Classifications need to be unique
|
527
|
-
resp = await client.patch(
|
528
|
-
f"/{KB_PREFIX}/{kbid}/resource/{rid}",
|
529
|
-
headers={"X-SYNCHRONOUS": "True"},
|
530
|
-
json={
|
531
|
-
"fieldmetadata": [
|
532
|
-
{
|
533
|
-
"paragraphs": [
|
534
|
-
{
|
535
|
-
"key": "paragraph1",
|
536
|
-
"classifications": [classification, classification],
|
537
|
-
}
|
538
|
-
],
|
539
|
-
"field": {"field": "text1", "field_type": "text"},
|
540
|
-
}
|
541
|
-
],
|
542
|
-
},
|
543
|
-
)
|
544
|
-
assert resp.status_code == 422
|
545
|
-
body = resp.json()
|
546
|
-
assert body["detail"] == "Paragraph classifications need to be unique"
|
@@ -1,137 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import pytest
|
21
|
-
|
22
|
-
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
|
23
|
-
from nucliadb_models.entities import CreateEntitiesGroupPayload, Entity
|
24
|
-
from nucliadb_models.labels import Label, LabelSet
|
25
|
-
from nucliadb_models.resource import NucliaDBRoles
|
26
|
-
from nucliadb_protos import knowledgebox_pb2, writer_pb2
|
27
|
-
from nucliadb_utils.utilities import get_ingest
|
28
|
-
|
29
|
-
|
30
|
-
@pytest.mark.asyncio
|
31
|
-
async def test_service_lifecycle_entities(writer_api, entities_manager_mock):
|
32
|
-
async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
|
33
|
-
resp = await client.post(
|
34
|
-
f"/{KBS_PREFIX}",
|
35
|
-
json={
|
36
|
-
"slug": "kbid1",
|
37
|
-
"title": "My Knowledge Box",
|
38
|
-
},
|
39
|
-
)
|
40
|
-
assert resp.status_code == 201
|
41
|
-
data = resp.json()
|
42
|
-
assert data["slug"] == "kbid1"
|
43
|
-
kbid = data["uuid"]
|
44
|
-
|
45
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
46
|
-
eg = CreateEntitiesGroupPayload(
|
47
|
-
group="0",
|
48
|
-
title="My group",
|
49
|
-
color="#0000000",
|
50
|
-
entities={
|
51
|
-
"ent1": Entity(value="asd", merged=False),
|
52
|
-
"ent2": Entity(value="asd", merged=False),
|
53
|
-
"ent3": Entity(value="asd", merged=False),
|
54
|
-
},
|
55
|
-
)
|
56
|
-
|
57
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
|
58
|
-
assert resp.status_code == 200
|
59
|
-
|
60
|
-
ingest = get_ingest()
|
61
|
-
result = await ingest.GetEntities(
|
62
|
-
writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
|
63
|
-
)
|
64
|
-
assert set(result.groups.keys()) == {"0"}
|
65
|
-
assert result.groups["0"].title == eg.title
|
66
|
-
assert result.groups["0"].color == eg.color
|
67
|
-
assert set(result.groups["0"].entities.keys()) == {"ent1", "ent2", "ent3"}
|
68
|
-
assert result.groups["0"].entities["ent1"].value == "asd"
|
69
|
-
|
70
|
-
eg.group = "1"
|
71
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
|
72
|
-
assert resp.status_code == 200
|
73
|
-
result = await ingest.GetEntities(
|
74
|
-
writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
|
75
|
-
)
|
76
|
-
assert set(result.groups.keys()) == {"0", "1"}
|
77
|
-
|
78
|
-
|
79
|
-
@pytest.mark.asyncio
|
80
|
-
async def test_entities_custom_field_for_user_defined_groups(
|
81
|
-
writer_api, entities_manager_mock
|
82
|
-
):
|
83
|
-
"""
|
84
|
-
Test description:
|
85
|
-
|
86
|
-
- Create an entity group and check that the default value for the `custom`
|
87
|
-
field is True
|
88
|
-
"""
|
89
|
-
async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
|
90
|
-
resp = await client.post(
|
91
|
-
f"/{KBS_PREFIX}",
|
92
|
-
json={
|
93
|
-
"slug": "kbid1",
|
94
|
-
"title": "My Knowledge Box",
|
95
|
-
},
|
96
|
-
)
|
97
|
-
assert resp.status_code == 201
|
98
|
-
data = resp.json()
|
99
|
-
kbid = data["uuid"]
|
100
|
-
|
101
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
102
|
-
eg = CreateEntitiesGroupPayload(group="0")
|
103
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
|
104
|
-
assert resp.status_code == 200
|
105
|
-
|
106
|
-
ingest = get_ingest()
|
107
|
-
result = await ingest.GetEntities(
|
108
|
-
writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
|
109
|
-
)
|
110
|
-
assert result.groups["0"].custom is True
|
111
|
-
|
112
|
-
|
113
|
-
@pytest.mark.asyncio
|
114
|
-
async def test_service_lifecycle_labels(writer_api):
|
115
|
-
async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
|
116
|
-
resp = await client.post(
|
117
|
-
f"/{KBS_PREFIX}",
|
118
|
-
json={
|
119
|
-
"slug": "kbid1",
|
120
|
-
"title": "My Knowledge Box",
|
121
|
-
},
|
122
|
-
)
|
123
|
-
assert resp.status_code == 201
|
124
|
-
data = resp.json()
|
125
|
-
assert data["slug"] == "kbid1"
|
126
|
-
kbid = data["uuid"]
|
127
|
-
|
128
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
129
|
-
ls = LabelSet(
|
130
|
-
title="My labelset", color="#0000000", multiple=False, kind=["RESOURCES"]
|
131
|
-
)
|
132
|
-
ls.labels.append(Label(title="asd"))
|
133
|
-
ls.labels.append(Label(title="asd"))
|
134
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/labelset/ls1", json=ls.dict())
|
135
|
-
assert resp.status_code == 200
|
136
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/labelset/ls2", json=ls.dict())
|
137
|
-
assert resp.status_code == 200
|