nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,584 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import asyncio
|
21
|
-
from unittest.mock import AsyncMock, MagicMock, Mock
|
22
|
-
|
23
|
-
import aiohttp
|
24
|
-
import pytest
|
25
|
-
from yarl import URL
|
26
|
-
|
27
|
-
from nucliadb.search.predict import (
|
28
|
-
DummyPredictEngine,
|
29
|
-
PredictEngine,
|
30
|
-
PredictVectorMissing,
|
31
|
-
ProxiedPredictAPIError,
|
32
|
-
RephraseError,
|
33
|
-
RephraseMissingContextError,
|
34
|
-
SendToPredictError,
|
35
|
-
_parse_rephrase_response,
|
36
|
-
get_answer_generator,
|
37
|
-
)
|
38
|
-
from nucliadb.tests.utils.aiohttp_session import get_mocked_session
|
39
|
-
from nucliadb_models.search import (
|
40
|
-
AskDocumentModel,
|
41
|
-
ChatModel,
|
42
|
-
FeedbackRequest,
|
43
|
-
FeedbackTasks,
|
44
|
-
RephraseModel,
|
45
|
-
SummarizedResource,
|
46
|
-
SummarizedResponse,
|
47
|
-
SummarizeModel,
|
48
|
-
SummarizeResourceModel,
|
49
|
-
)
|
50
|
-
from nucliadb_utils.exceptions import LimitsExceededError
|
51
|
-
|
52
|
-
|
53
|
-
@pytest.mark.asyncio
|
54
|
-
async def test_dummy_predict_engine():
|
55
|
-
pe = DummyPredictEngine()
|
56
|
-
await pe.initialize()
|
57
|
-
await pe.finalize()
|
58
|
-
await pe.send_feedback("kbid", Mock(), "", "", "")
|
59
|
-
assert await pe.rephrase_query("kbid", Mock())
|
60
|
-
assert await pe.chat_query("kbid", Mock())
|
61
|
-
assert await pe.convert_sentence_to_vector("kbid", "some sentence")
|
62
|
-
assert await pe.detect_entities("kbid", "some sentence")
|
63
|
-
assert await pe.ask_document("kbid", "query", [["footext"]], "userid")
|
64
|
-
assert await pe.summarize("kbid", Mock(resources={}))
|
65
|
-
|
66
|
-
|
67
|
-
@pytest.mark.asyncio
|
68
|
-
@pytest.mark.parametrize(
|
69
|
-
"onprem,expected_url,expected_header,expected_header_value",
|
70
|
-
[
|
71
|
-
(
|
72
|
-
True,
|
73
|
-
"{public_url}/api/v1/predict/sentence/kbid",
|
74
|
-
"X-STF-NUAKEY",
|
75
|
-
"Bearer {service_account}",
|
76
|
-
),
|
77
|
-
(False, "{cluster}/api/v1/internal/predict/sentence", "X-STF-KBID", "{kbid}"),
|
78
|
-
],
|
79
|
-
)
|
80
|
-
async def test_convert_sentence_ok(
|
81
|
-
onprem, expected_url, expected_header, expected_header_value
|
82
|
-
):
|
83
|
-
service_account = "service-account"
|
84
|
-
|
85
|
-
pe = PredictEngine(
|
86
|
-
"cluster",
|
87
|
-
"public-{zone}",
|
88
|
-
service_account,
|
89
|
-
zone="zone1",
|
90
|
-
onprem=onprem,
|
91
|
-
)
|
92
|
-
|
93
|
-
pe.session = get_mocked_session(
|
94
|
-
"GET", 200, json={"data": [0.0, 0.1]}, context_manager=False
|
95
|
-
)
|
96
|
-
|
97
|
-
kbid = "kbid"
|
98
|
-
sentence = "some sentence"
|
99
|
-
|
100
|
-
assert await pe.convert_sentence_to_vector(kbid, sentence) == [0.0, 0.1]
|
101
|
-
|
102
|
-
path = expected_url.format(public_url=pe.public_url, cluster=pe.cluster_url)
|
103
|
-
|
104
|
-
headers = {
|
105
|
-
expected_header: expected_header_value.format(
|
106
|
-
kbid=kbid, service_account=service_account
|
107
|
-
)
|
108
|
-
}
|
109
|
-
pe.session.get.assert_awaited_once_with(
|
110
|
-
url=path,
|
111
|
-
params={"text": sentence},
|
112
|
-
headers=headers,
|
113
|
-
)
|
114
|
-
|
115
|
-
|
116
|
-
@pytest.mark.asyncio
|
117
|
-
@pytest.mark.parametrize("onprem", [True, False])
|
118
|
-
async def test_convert_sentence_error(onprem):
|
119
|
-
pe = PredictEngine(
|
120
|
-
"cluster",
|
121
|
-
"public-{zone}",
|
122
|
-
"service-account",
|
123
|
-
onprem=onprem,
|
124
|
-
)
|
125
|
-
pe.session = get_mocked_session("GET", 400, json="uops!", context_manager=False)
|
126
|
-
with pytest.raises(ProxiedPredictAPIError):
|
127
|
-
await pe.convert_sentence_to_vector("kbid", "some sentence")
|
128
|
-
|
129
|
-
|
130
|
-
@pytest.mark.asyncio
|
131
|
-
@pytest.mark.parametrize(
|
132
|
-
"onprem,expected_url,expected_header,expected_header_value",
|
133
|
-
[
|
134
|
-
(
|
135
|
-
True,
|
136
|
-
"{public_url}/api/v1/predict/tokens/kbid",
|
137
|
-
"X-STF-NUAKEY",
|
138
|
-
"Bearer {service_account}",
|
139
|
-
),
|
140
|
-
(False, "{cluster}/api/v1/internal/predict/tokens", "X-STF-KBID", "{kbid}"),
|
141
|
-
],
|
142
|
-
)
|
143
|
-
async def test_detect_entities_ok(
|
144
|
-
onprem, expected_url, expected_header, expected_header_value
|
145
|
-
):
|
146
|
-
cluster_url = "cluster"
|
147
|
-
public_url = "public-{zone}"
|
148
|
-
service_account = "service-account"
|
149
|
-
zone = "zone1"
|
150
|
-
|
151
|
-
pe = PredictEngine(
|
152
|
-
cluster_url,
|
153
|
-
public_url,
|
154
|
-
service_account,
|
155
|
-
zone=zone,
|
156
|
-
onprem=onprem,
|
157
|
-
)
|
158
|
-
pe.session = get_mocked_session(
|
159
|
-
"GET",
|
160
|
-
200,
|
161
|
-
json={"tokens": [{"text": "foo", "ner": "bar"}]},
|
162
|
-
context_manager=False,
|
163
|
-
)
|
164
|
-
|
165
|
-
kbid = "kbid"
|
166
|
-
sentence = "some sentence"
|
167
|
-
assert len(await pe.detect_entities(kbid, sentence)) > 0
|
168
|
-
|
169
|
-
path = expected_url.format(public_url=pe.public_url, cluster=pe.cluster_url)
|
170
|
-
|
171
|
-
headers = {
|
172
|
-
expected_header: expected_header_value.format(
|
173
|
-
kbid=kbid, service_account=service_account
|
174
|
-
)
|
175
|
-
}
|
176
|
-
pe.session.get.assert_awaited_once_with(
|
177
|
-
url=path,
|
178
|
-
params={"text": sentence},
|
179
|
-
headers=headers,
|
180
|
-
)
|
181
|
-
|
182
|
-
|
183
|
-
@pytest.mark.asyncio
|
184
|
-
@pytest.mark.parametrize("onprem", [True, False])
|
185
|
-
async def test_detect_entities_error(onprem):
|
186
|
-
pe = PredictEngine(
|
187
|
-
"cluster",
|
188
|
-
"public-{zone}",
|
189
|
-
"service-account",
|
190
|
-
onprem=onprem,
|
191
|
-
)
|
192
|
-
pe.session = get_mocked_session("GET", 500, json="error", context_manager=False)
|
193
|
-
with pytest.raises(ProxiedPredictAPIError):
|
194
|
-
await pe.detect_entities("kbid", "some sentence")
|
195
|
-
|
196
|
-
|
197
|
-
@pytest.fixture(scope="function")
|
198
|
-
def session_limits_exceeded():
|
199
|
-
session = AsyncMock()
|
200
|
-
resp = Mock(status=402)
|
201
|
-
resp.json = AsyncMock(return_value={"detail": "limits exceeded"})
|
202
|
-
resp.read = AsyncMock(return_value="something went wrong")
|
203
|
-
session.post.return_value = resp
|
204
|
-
session.get.return_value = resp
|
205
|
-
return session
|
206
|
-
|
207
|
-
|
208
|
-
@pytest.mark.asyncio
|
209
|
-
@pytest.mark.parametrize(
|
210
|
-
"method,args",
|
211
|
-
[
|
212
|
-
("convert_sentence_to_vector", ["kbid", "sentence"]),
|
213
|
-
("detect_entities", ["kbid", "sentence"]),
|
214
|
-
("chat_query", ["kbid", ChatModel(question="foo", user_id="bar")]),
|
215
|
-
(
|
216
|
-
"send_feedback",
|
217
|
-
[
|
218
|
-
"kbid",
|
219
|
-
FeedbackRequest(ident="foo", good=True, task=FeedbackTasks.CHAT),
|
220
|
-
"",
|
221
|
-
"",
|
222
|
-
"",
|
223
|
-
],
|
224
|
-
),
|
225
|
-
("rephrase_query", ["kbid", RephraseModel(question="foo", user_id="bar")]),
|
226
|
-
("ask_document", ["kbid", "query", [["footext"]], "userid"]),
|
227
|
-
],
|
228
|
-
)
|
229
|
-
async def test_predict_engine_handles_limits_exceeded_error(
|
230
|
-
session_limits_exceeded, method, args
|
231
|
-
):
|
232
|
-
pe = PredictEngine(
|
233
|
-
"cluster",
|
234
|
-
"public-{zone}",
|
235
|
-
"service-account",
|
236
|
-
onprem=True,
|
237
|
-
)
|
238
|
-
pe.session = session_limits_exceeded
|
239
|
-
with pytest.raises(LimitsExceededError):
|
240
|
-
await getattr(pe, method)(*args)
|
241
|
-
|
242
|
-
|
243
|
-
@pytest.mark.parametrize(
|
244
|
-
"method,args,exception,output",
|
245
|
-
[
|
246
|
-
("chat_query", ["kbid", Mock()], True, None),
|
247
|
-
("rephrase_query", ["kbid", Mock()], True, None),
|
248
|
-
("send_feedback", ["kbid", MagicMock(), "", "", ""], False, None),
|
249
|
-
("convert_sentence_to_vector", ["kbid", "sentence"], False, []),
|
250
|
-
("detect_entities", ["kbid", "sentence"], False, []),
|
251
|
-
("ask_document", ["kbid", "query", [["footext"]], "userid"], True, None),
|
252
|
-
("summarize", ["kbid", Mock(resources={})], True, None),
|
253
|
-
],
|
254
|
-
)
|
255
|
-
async def test_onprem_nuclia_service_account_not_configured(
|
256
|
-
method, args, exception, output
|
257
|
-
):
|
258
|
-
pe = PredictEngine(
|
259
|
-
"cluster",
|
260
|
-
"public-{zone}",
|
261
|
-
nuclia_service_account=None,
|
262
|
-
onprem=True,
|
263
|
-
)
|
264
|
-
if exception:
|
265
|
-
with pytest.raises(SendToPredictError):
|
266
|
-
await getattr(pe, method)(*args)
|
267
|
-
else:
|
268
|
-
assert await getattr(pe, method)(*args) == output
|
269
|
-
|
270
|
-
|
271
|
-
async def test_convert_sentence_to_vector_empty_vectors():
|
272
|
-
pe = PredictEngine(
|
273
|
-
"cluster",
|
274
|
-
"public-{zone}",
|
275
|
-
nuclia_service_account="foo",
|
276
|
-
onprem=True,
|
277
|
-
)
|
278
|
-
pe.session = get_mocked_session(
|
279
|
-
"GET", 200, json={"data": []}, context_manager=False
|
280
|
-
)
|
281
|
-
with pytest.raises(PredictVectorMissing):
|
282
|
-
await pe.convert_sentence_to_vector("kbid", "sentence")
|
283
|
-
|
284
|
-
|
285
|
-
async def test_ask_document_onprem():
|
286
|
-
pe = PredictEngine(
|
287
|
-
"cluster",
|
288
|
-
"public-{zone}",
|
289
|
-
nuclia_service_account="foo",
|
290
|
-
zone="europe1",
|
291
|
-
onprem=True,
|
292
|
-
)
|
293
|
-
pe.session = get_mocked_session(
|
294
|
-
"POST", 200, text="The answer", context_manager=False
|
295
|
-
)
|
296
|
-
|
297
|
-
assert (
|
298
|
-
await pe.ask_document("kbid", "query", [["footext"]], "userid") == "The answer"
|
299
|
-
)
|
300
|
-
|
301
|
-
pe.session.post.assert_awaited_once_with(
|
302
|
-
url="public-europe1/api/v1/predict/ask_document/kbid",
|
303
|
-
json=AskDocumentModel(
|
304
|
-
question="query", blocks=[["footext"]], user_id="userid"
|
305
|
-
).dict(),
|
306
|
-
headers={"X-STF-NUAKEY": "Bearer foo"},
|
307
|
-
timeout=None,
|
308
|
-
)
|
309
|
-
|
310
|
-
|
311
|
-
async def test_ask_document_cloud():
|
312
|
-
pe = PredictEngine(
|
313
|
-
"cluster",
|
314
|
-
"public-{zone}",
|
315
|
-
zone="europe1",
|
316
|
-
onprem=False,
|
317
|
-
)
|
318
|
-
pe.session = get_mocked_session(
|
319
|
-
"POST", 200, text="The answer", context_manager=False
|
320
|
-
)
|
321
|
-
|
322
|
-
assert (
|
323
|
-
await pe.ask_document("kbid", "query", [["footext"]], "userid") == "The answer"
|
324
|
-
)
|
325
|
-
|
326
|
-
pe.session.post.assert_awaited_once_with(
|
327
|
-
url="cluster/api/v1/internal/predict/ask_document",
|
328
|
-
json=AskDocumentModel(
|
329
|
-
question="query", blocks=[["footext"]], user_id="userid"
|
330
|
-
).dict(),
|
331
|
-
headers={"X-STF-KBID": "kbid"},
|
332
|
-
timeout=None,
|
333
|
-
)
|
334
|
-
|
335
|
-
|
336
|
-
async def test_rephrase():
|
337
|
-
pe = PredictEngine(
|
338
|
-
"cluster",
|
339
|
-
"public-{zone}",
|
340
|
-
zone="europe1",
|
341
|
-
onprem=False,
|
342
|
-
)
|
343
|
-
pe.session = get_mocked_session(
|
344
|
-
"POST", 200, json="rephrased", context_manager=False
|
345
|
-
)
|
346
|
-
|
347
|
-
item = RephraseModel(
|
348
|
-
question="question", chat_history=[], user_id="foo", user_context=["foo"]
|
349
|
-
)
|
350
|
-
rephrased_query = await pe.rephrase_query("kbid", item)
|
351
|
-
# The rephrase query should not be wrapped in quotes, otherwise it will trigger an exact match query to the index
|
352
|
-
assert rephrased_query.strip('"') == rephrased_query
|
353
|
-
assert rephrased_query == "rephrased"
|
354
|
-
|
355
|
-
pe.session.post.assert_awaited_once_with(
|
356
|
-
url="cluster/api/v1/internal/predict/rephrase",
|
357
|
-
json=item.dict(),
|
358
|
-
headers={"X-STF-KBID": "kbid"},
|
359
|
-
)
|
360
|
-
|
361
|
-
|
362
|
-
async def test_rephrase_onprem():
|
363
|
-
pe = PredictEngine(
|
364
|
-
"cluster",
|
365
|
-
"public-{zone}",
|
366
|
-
zone="europe1",
|
367
|
-
onprem=True,
|
368
|
-
nuclia_service_account="nuakey",
|
369
|
-
)
|
370
|
-
pe.session = get_mocked_session(
|
371
|
-
"POST", 200, json="rephrased", context_manager=False
|
372
|
-
)
|
373
|
-
|
374
|
-
item = RephraseModel(
|
375
|
-
question="question", chat_history=[], user_id="foo", user_context=["foo"]
|
376
|
-
)
|
377
|
-
rephrased_query = await pe.rephrase_query("kbid", item)
|
378
|
-
# The rephrase query should not be wrapped in quotes, otherwise it will trigger an exact match query to the index
|
379
|
-
assert rephrased_query.strip('"') == rephrased_query
|
380
|
-
assert rephrased_query == "rephrased"
|
381
|
-
|
382
|
-
pe.session.post.assert_awaited_once_with(
|
383
|
-
url="public-europe1/api/v1/predict/rephrase/kbid",
|
384
|
-
json=item.dict(),
|
385
|
-
headers={"X-STF-NUAKEY": "Bearer nuakey"},
|
386
|
-
)
|
387
|
-
|
388
|
-
|
389
|
-
async def test_feedback():
|
390
|
-
pe = PredictEngine(
|
391
|
-
"cluster",
|
392
|
-
"public-{zone}",
|
393
|
-
zone="europe1",
|
394
|
-
onprem=False,
|
395
|
-
)
|
396
|
-
pe.session = get_mocked_session("POST", 204, json="", context_manager=False)
|
397
|
-
|
398
|
-
x_nucliadb_user = "user"
|
399
|
-
x_ndb_client = "client"
|
400
|
-
x_forwarded_for = "fwfor"
|
401
|
-
item = FeedbackRequest(ident="foo", good=True, task=FeedbackTasks.CHAT)
|
402
|
-
await pe.send_feedback("kbid", item, x_nucliadb_user, x_ndb_client, x_forwarded_for)
|
403
|
-
|
404
|
-
json_data = item.dict()
|
405
|
-
json_data["user_id"] = x_nucliadb_user
|
406
|
-
json_data["client"] = x_ndb_client
|
407
|
-
json_data["forwarded"] = x_forwarded_for
|
408
|
-
|
409
|
-
pe.session.post.assert_awaited_once_with(
|
410
|
-
url="cluster/api/v1/internal/predict/feedback",
|
411
|
-
json=json_data,
|
412
|
-
headers={"X-STF-KBID": "kbid"},
|
413
|
-
)
|
414
|
-
|
415
|
-
|
416
|
-
async def test_feedback_onprem():
|
417
|
-
pe = PredictEngine(
|
418
|
-
"cluster",
|
419
|
-
"public-{zone}",
|
420
|
-
zone="europe1",
|
421
|
-
onprem=True,
|
422
|
-
nuclia_service_account="nuakey",
|
423
|
-
)
|
424
|
-
|
425
|
-
pe.session = get_mocked_session("POST", 204, json="", context_manager=False)
|
426
|
-
|
427
|
-
x_nucliadb_user = "user"
|
428
|
-
x_ndb_client = "client"
|
429
|
-
x_forwarded_for = "fwfor"
|
430
|
-
item = FeedbackRequest(ident="foo", good=True, task=FeedbackTasks.CHAT)
|
431
|
-
await pe.send_feedback("kbid", item, x_nucliadb_user, x_ndb_client, x_forwarded_for)
|
432
|
-
|
433
|
-
json_data = item.dict()
|
434
|
-
json_data["user_id"] = x_nucliadb_user
|
435
|
-
json_data["client"] = x_ndb_client
|
436
|
-
json_data["forwarded"] = x_forwarded_for
|
437
|
-
|
438
|
-
pe.session.post.assert_awaited_once_with(
|
439
|
-
url="public-europe1/api/v1/predict/feedback/kbid",
|
440
|
-
json=json_data,
|
441
|
-
headers={"X-STF-NUAKEY": "Bearer nuakey"},
|
442
|
-
)
|
443
|
-
|
444
|
-
|
445
|
-
@pytest.mark.parametrize(
|
446
|
-
"content,exception",
|
447
|
-
[
|
448
|
-
("foobar", None),
|
449
|
-
("foobar0", None),
|
450
|
-
("foobar-1", RephraseError),
|
451
|
-
("foobar-2", RephraseMissingContextError),
|
452
|
-
],
|
453
|
-
)
|
454
|
-
async def test_parse_rephrase_response(content, exception):
|
455
|
-
resp = Mock()
|
456
|
-
resp.json = AsyncMock(return_value=content)
|
457
|
-
if exception:
|
458
|
-
with pytest.raises(exception):
|
459
|
-
await _parse_rephrase_response(resp)
|
460
|
-
else:
|
461
|
-
assert await _parse_rephrase_response(resp) == content.rstrip("0")
|
462
|
-
|
463
|
-
|
464
|
-
async def test_check_response_error():
|
465
|
-
response = aiohttp.ClientResponse(
|
466
|
-
"GET",
|
467
|
-
URL("http://predict:8080/api/v1/chat"),
|
468
|
-
writer=None,
|
469
|
-
continue100=Mock(),
|
470
|
-
timer=Mock(),
|
471
|
-
request_info=Mock(),
|
472
|
-
traces=[],
|
473
|
-
loop=Mock(),
|
474
|
-
session=Mock(),
|
475
|
-
)
|
476
|
-
response.status = 503
|
477
|
-
response._body = b"some error"
|
478
|
-
response._headers = {"Content-Type": "text/plain; charset=utf-8"}
|
479
|
-
|
480
|
-
with pytest.raises(ProxiedPredictAPIError) as ex:
|
481
|
-
await PredictEngine().check_response(response, expected_status=200)
|
482
|
-
assert ex.value.status == 503
|
483
|
-
assert ex.value.detail == "some error"
|
484
|
-
|
485
|
-
|
486
|
-
async def test_summarize():
|
487
|
-
pe = PredictEngine(
|
488
|
-
"cluster",
|
489
|
-
"public-{zone}",
|
490
|
-
zone="europe1",
|
491
|
-
onprem=False,
|
492
|
-
)
|
493
|
-
|
494
|
-
summarized = SummarizedResponse(
|
495
|
-
resources={"r1": SummarizedResource(summary="resource summary", tokens=10)}
|
496
|
-
)
|
497
|
-
pe.session = get_mocked_session(
|
498
|
-
"POST", 200, json=summarized.dict(), context_manager=False
|
499
|
-
)
|
500
|
-
|
501
|
-
item = SummarizeModel(
|
502
|
-
resources={"r1": SummarizeResourceModel(fields={"f1": "field extracted text"})}
|
503
|
-
)
|
504
|
-
summarize_response = await pe.summarize("kbid", item)
|
505
|
-
|
506
|
-
assert summarize_response == summarized
|
507
|
-
|
508
|
-
pe.session.post.assert_awaited_once_with(
|
509
|
-
url="cluster/api/v1/internal/predict/summarize",
|
510
|
-
json=item.dict(),
|
511
|
-
headers={"X-STF-KBID": "kbid"},
|
512
|
-
timeout=None,
|
513
|
-
)
|
514
|
-
|
515
|
-
|
516
|
-
async def test_summarize_onprem():
|
517
|
-
pe = PredictEngine(
|
518
|
-
"cluster",
|
519
|
-
"public-{zone}",
|
520
|
-
zone="europe1",
|
521
|
-
onprem=True,
|
522
|
-
nuclia_service_account="nuakey",
|
523
|
-
)
|
524
|
-
|
525
|
-
summarized = SummarizedResponse(
|
526
|
-
resources={"r1": SummarizedResource(summary="resource summary", tokens=10)}
|
527
|
-
)
|
528
|
-
pe.session = get_mocked_session(
|
529
|
-
"POST", 200, json=summarized.dict(), context_manager=False
|
530
|
-
)
|
531
|
-
|
532
|
-
item = SummarizeModel(
|
533
|
-
resources={"r1": SummarizeResourceModel(fields={"f1": "field extracted text"})}
|
534
|
-
)
|
535
|
-
summarize_response = await pe.summarize("kbid", item)
|
536
|
-
|
537
|
-
assert summarize_response == summarized
|
538
|
-
|
539
|
-
pe.session.post.assert_awaited_once_with(
|
540
|
-
url="public-europe1/api/v1/predict/summarize/kbid",
|
541
|
-
json=item.dict(),
|
542
|
-
headers={"X-STF-NUAKEY": "Bearer nuakey"},
|
543
|
-
timeout=None,
|
544
|
-
)
|
545
|
-
|
546
|
-
|
547
|
-
async def test_get_predict_headers_onprem():
|
548
|
-
nua_service_account = "nua-service-account"
|
549
|
-
pe = PredictEngine(
|
550
|
-
"cluster",
|
551
|
-
"public-{zone}",
|
552
|
-
zone="europe1",
|
553
|
-
onprem=True,
|
554
|
-
nuclia_service_account=nua_service_account,
|
555
|
-
)
|
556
|
-
assert pe.get_predict_headers("kbid") == {
|
557
|
-
"X-STF-NUAKEY": f"Bearer {nua_service_account}"
|
558
|
-
}
|
559
|
-
|
560
|
-
|
561
|
-
async def test_get_predict_headers_hosterd():
|
562
|
-
pe = PredictEngine(
|
563
|
-
"cluster",
|
564
|
-
"public-{zone}",
|
565
|
-
zone="europe1",
|
566
|
-
onprem=False,
|
567
|
-
)
|
568
|
-
assert pe.get_predict_headers("kbid") == {"X-STF-KBID": "kbid"}
|
569
|
-
|
570
|
-
|
571
|
-
async def test_get_answer_generator():
|
572
|
-
async def _iter_chunks():
|
573
|
-
await asyncio.sleep(0.1)
|
574
|
-
# Chunk, end_of_chunk
|
575
|
-
yield b"foo", False
|
576
|
-
yield b"bar", True
|
577
|
-
yield b"baz", True
|
578
|
-
|
579
|
-
resp = Mock()
|
580
|
-
resp.content.iter_chunks = Mock(return_value=_iter_chunks())
|
581
|
-
get_answer_generator(resp)
|
582
|
-
|
583
|
-
answer_chunks = [chunk async for chunk in get_answer_generator(resp)]
|
584
|
-
assert answer_chunks == [b"foobar", b"baz"]
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
pytest_plugins = [
|
21
|
-
"pytest_docker_fixtures",
|
22
|
-
"nucliadb.tests.fixtures",
|
23
|
-
"nucliadb.tests.tikv",
|
24
|
-
"nucliadb.ingest.tests.fixtures", # should be refactored out
|
25
|
-
"nucliadb.search.tests.fixtures",
|
26
|
-
"nucliadb.search.tests.node",
|
27
|
-
"nucliadb.standalone.tests.fixtures",
|
28
|
-
"nucliadb_utils.tests.conftest",
|
29
|
-
"nucliadb_utils.tests.gcs",
|
30
|
-
"nucliadb_utils.tests.nats",
|
31
|
-
"nucliadb_utils.tests.s3",
|
32
|
-
"nucliadb_utils.tests.indexing",
|
33
|
-
]
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import uuid
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
|
24
|
-
from nucliadb.search.api.v1.router import KB_PREFIX, KBS_PREFIX
|
25
|
-
|
26
|
-
|
27
|
-
@pytest.fixture(scope="function")
|
28
|
-
async def knowledgebox_one(nucliadb_manager):
|
29
|
-
kbslug = str(uuid.uuid4())
|
30
|
-
data = {"slug": kbslug}
|
31
|
-
resp = await nucliadb_manager.post(f"/{KBS_PREFIX}", json=data)
|
32
|
-
assert resp.status_code == 201
|
33
|
-
kbid = resp.json()["uuid"]
|
34
|
-
|
35
|
-
yield kbid
|
36
|
-
|
37
|
-
resp = await nucliadb_manager.delete(f"/{KB_PREFIX}/{kbid}")
|
38
|
-
assert resp.status_code == 200
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|