nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,525 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import asyncio
|
21
|
-
from unittest import mock
|
22
|
-
from unittest.mock import AsyncMock, MagicMock, Mock
|
23
|
-
|
24
|
-
import aiohttp
|
25
|
-
import pytest
|
26
|
-
from yarl import URL
|
27
|
-
|
28
|
-
from nucliadb.search.predict import (
|
29
|
-
CitationsGenerativeResponse,
|
30
|
-
DummyPredictEngine,
|
31
|
-
GenerativeChunk,
|
32
|
-
MetaGenerativeResponse,
|
33
|
-
PredictEngine,
|
34
|
-
ProxiedPredictAPIError,
|
35
|
-
RephraseError,
|
36
|
-
RephraseMissingContextError,
|
37
|
-
SendToPredictError,
|
38
|
-
StatusGenerativeResponse,
|
39
|
-
TextGenerativeResponse,
|
40
|
-
_parse_rephrase_response,
|
41
|
-
get_answer_generator,
|
42
|
-
get_chat_ndjson_generator,
|
43
|
-
)
|
44
|
-
from nucliadb.tests.utils.aiohttp_session import get_mocked_session
|
45
|
-
from nucliadb_models.search import (
|
46
|
-
ChatModel,
|
47
|
-
FeedbackRequest,
|
48
|
-
FeedbackTasks,
|
49
|
-
RephraseModel,
|
50
|
-
SummarizedResource,
|
51
|
-
SummarizedResponse,
|
52
|
-
SummarizeModel,
|
53
|
-
SummarizeResourceModel,
|
54
|
-
)
|
55
|
-
from nucliadb_utils.exceptions import LimitsExceededError
|
56
|
-
|
57
|
-
|
58
|
-
@pytest.mark.asyncio
|
59
|
-
async def test_dummy_predict_engine():
|
60
|
-
pe = DummyPredictEngine()
|
61
|
-
await pe.initialize()
|
62
|
-
await pe.finalize()
|
63
|
-
await pe.send_feedback("kbid", Mock(), "", "", "")
|
64
|
-
assert await pe.rephrase_query("kbid", Mock())
|
65
|
-
assert await pe.chat_query("kbid", Mock())
|
66
|
-
assert await pe.detect_entities("kbid", "some sentence")
|
67
|
-
assert await pe.summarize("kbid", Mock(resources={}))
|
68
|
-
|
69
|
-
|
70
|
-
@pytest.mark.asyncio
|
71
|
-
@pytest.mark.parametrize(
|
72
|
-
"onprem,expected_url,expected_header,expected_header_value",
|
73
|
-
[
|
74
|
-
(
|
75
|
-
True,
|
76
|
-
"{public_url}/api/v1/predict/tokens/kbid",
|
77
|
-
"X-STF-NUAKEY",
|
78
|
-
"Bearer {service_account}",
|
79
|
-
),
|
80
|
-
(False, "{cluster}/api/v1/internal/predict/tokens", "X-STF-KBID", "{kbid}"),
|
81
|
-
],
|
82
|
-
)
|
83
|
-
async def test_detect_entities_ok(
|
84
|
-
onprem, expected_url, expected_header, expected_header_value
|
85
|
-
):
|
86
|
-
cluster_url = "cluster"
|
87
|
-
public_url = "public-{zone}"
|
88
|
-
service_account = "service-account"
|
89
|
-
zone = "zone1"
|
90
|
-
|
91
|
-
pe = PredictEngine(
|
92
|
-
cluster_url,
|
93
|
-
public_url,
|
94
|
-
service_account,
|
95
|
-
zone=zone,
|
96
|
-
onprem=onprem,
|
97
|
-
)
|
98
|
-
pe.session = get_mocked_session(
|
99
|
-
"GET",
|
100
|
-
200,
|
101
|
-
json={"tokens": [{"text": "foo", "ner": "bar"}]},
|
102
|
-
context_manager=False,
|
103
|
-
)
|
104
|
-
|
105
|
-
kbid = "kbid"
|
106
|
-
sentence = "some sentence"
|
107
|
-
assert len(await pe.detect_entities(kbid, sentence)) > 0
|
108
|
-
|
109
|
-
path = expected_url.format(public_url=pe.public_url, cluster=pe.cluster_url)
|
110
|
-
|
111
|
-
headers = {
|
112
|
-
expected_header: expected_header_value.format(
|
113
|
-
kbid=kbid, service_account=service_account
|
114
|
-
)
|
115
|
-
}
|
116
|
-
pe.session.get.assert_awaited_once_with(
|
117
|
-
url=path,
|
118
|
-
params={"text": sentence},
|
119
|
-
headers=headers,
|
120
|
-
)
|
121
|
-
|
122
|
-
|
123
|
-
@pytest.mark.asyncio
|
124
|
-
@pytest.mark.parametrize("onprem", [True, False])
|
125
|
-
async def test_detect_entities_error(onprem):
|
126
|
-
pe = PredictEngine(
|
127
|
-
"cluster",
|
128
|
-
"public-{zone}",
|
129
|
-
"service-account",
|
130
|
-
onprem=onprem,
|
131
|
-
)
|
132
|
-
pe.session = get_mocked_session("GET", 500, json="error", context_manager=False)
|
133
|
-
with pytest.raises(ProxiedPredictAPIError):
|
134
|
-
await pe.detect_entities("kbid", "some sentence")
|
135
|
-
|
136
|
-
|
137
|
-
@pytest.fixture(scope="function")
|
138
|
-
def session_limits_exceeded():
|
139
|
-
session = AsyncMock()
|
140
|
-
resp = Mock(status=402)
|
141
|
-
resp.json = AsyncMock(return_value={"detail": "limits exceeded"})
|
142
|
-
resp.read = AsyncMock(return_value="something went wrong")
|
143
|
-
session.post.return_value = resp
|
144
|
-
session.get.return_value = resp
|
145
|
-
return session
|
146
|
-
|
147
|
-
|
148
|
-
@pytest.mark.asyncio
|
149
|
-
@pytest.mark.parametrize(
|
150
|
-
"method,args",
|
151
|
-
[
|
152
|
-
("detect_entities", ["kbid", "sentence"]),
|
153
|
-
("chat_query", ["kbid", ChatModel(question="foo", user_id="bar")]),
|
154
|
-
(
|
155
|
-
"send_feedback",
|
156
|
-
[
|
157
|
-
"kbid",
|
158
|
-
FeedbackRequest(ident="foo", good=True, task=FeedbackTasks.CHAT),
|
159
|
-
"",
|
160
|
-
"",
|
161
|
-
"",
|
162
|
-
],
|
163
|
-
),
|
164
|
-
("rephrase_query", ["kbid", RephraseModel(question="foo", user_id="bar")]),
|
165
|
-
],
|
166
|
-
)
|
167
|
-
async def test_predict_engine_handles_limits_exceeded_error(
|
168
|
-
session_limits_exceeded, method, args
|
169
|
-
):
|
170
|
-
pe = PredictEngine(
|
171
|
-
"cluster",
|
172
|
-
"public-{zone}",
|
173
|
-
"service-account",
|
174
|
-
onprem=True,
|
175
|
-
)
|
176
|
-
pe.session = session_limits_exceeded
|
177
|
-
with pytest.raises(LimitsExceededError):
|
178
|
-
await getattr(pe, method)(*args)
|
179
|
-
|
180
|
-
|
181
|
-
@pytest.mark.parametrize(
|
182
|
-
"method,args,exception,output",
|
183
|
-
[
|
184
|
-
("chat_query", ["kbid", Mock()], True, None),
|
185
|
-
("rephrase_query", ["kbid", Mock()], True, None),
|
186
|
-
("send_feedback", ["kbid", MagicMock(), "", "", ""], False, None),
|
187
|
-
("detect_entities", ["kbid", "sentence"], False, []),
|
188
|
-
("summarize", ["kbid", Mock(resources={})], True, None),
|
189
|
-
],
|
190
|
-
)
|
191
|
-
async def test_onprem_nuclia_service_account_not_configured(
|
192
|
-
method, args, exception, output
|
193
|
-
):
|
194
|
-
pe = PredictEngine(
|
195
|
-
"cluster",
|
196
|
-
"public-{zone}",
|
197
|
-
nuclia_service_account=None,
|
198
|
-
onprem=True,
|
199
|
-
)
|
200
|
-
if exception:
|
201
|
-
with pytest.raises(SendToPredictError):
|
202
|
-
await getattr(pe, method)(*args)
|
203
|
-
else:
|
204
|
-
assert await getattr(pe, method)(*args) == output
|
205
|
-
|
206
|
-
|
207
|
-
async def test_rephrase():
|
208
|
-
pe = PredictEngine(
|
209
|
-
"cluster",
|
210
|
-
"public-{zone}",
|
211
|
-
zone="europe1",
|
212
|
-
onprem=False,
|
213
|
-
)
|
214
|
-
pe.session = get_mocked_session(
|
215
|
-
"POST", 200, json="rephrased", context_manager=False
|
216
|
-
)
|
217
|
-
|
218
|
-
item = RephraseModel(
|
219
|
-
question="question", chat_history=[], user_id="foo", user_context=["foo"]
|
220
|
-
)
|
221
|
-
rephrased_query = await pe.rephrase_query("kbid", item)
|
222
|
-
# The rephrase query should not be wrapped in quotes, otherwise it will trigger an exact match query to the index
|
223
|
-
assert rephrased_query.strip('"') == rephrased_query
|
224
|
-
assert rephrased_query == "rephrased"
|
225
|
-
|
226
|
-
pe.session.post.assert_awaited_once_with(
|
227
|
-
url="cluster/api/v1/internal/predict/rephrase",
|
228
|
-
json=item.dict(),
|
229
|
-
headers={"X-STF-KBID": "kbid"},
|
230
|
-
)
|
231
|
-
|
232
|
-
|
233
|
-
async def test_rephrase_onprem():
|
234
|
-
pe = PredictEngine(
|
235
|
-
"cluster",
|
236
|
-
"public-{zone}",
|
237
|
-
zone="europe1",
|
238
|
-
onprem=True,
|
239
|
-
nuclia_service_account="nuakey",
|
240
|
-
)
|
241
|
-
pe.session = get_mocked_session(
|
242
|
-
"POST", 200, json="rephrased", context_manager=False
|
243
|
-
)
|
244
|
-
|
245
|
-
item = RephraseModel(
|
246
|
-
question="question", chat_history=[], user_id="foo", user_context=["foo"]
|
247
|
-
)
|
248
|
-
rephrased_query = await pe.rephrase_query("kbid", item)
|
249
|
-
# The rephrase query should not be wrapped in quotes, otherwise it will trigger an exact match query to the index
|
250
|
-
assert rephrased_query.strip('"') == rephrased_query
|
251
|
-
assert rephrased_query == "rephrased"
|
252
|
-
|
253
|
-
pe.session.post.assert_awaited_once_with(
|
254
|
-
url="public-europe1/api/v1/predict/rephrase/kbid",
|
255
|
-
json=item.dict(),
|
256
|
-
headers={"X-STF-NUAKEY": "Bearer nuakey"},
|
257
|
-
)
|
258
|
-
|
259
|
-
|
260
|
-
async def test_feedback():
|
261
|
-
pe = PredictEngine(
|
262
|
-
"cluster",
|
263
|
-
"public-{zone}",
|
264
|
-
zone="europe1",
|
265
|
-
onprem=False,
|
266
|
-
)
|
267
|
-
pe.session = get_mocked_session("POST", 204, json="", context_manager=False)
|
268
|
-
|
269
|
-
x_nucliadb_user = "user"
|
270
|
-
x_ndb_client = "client"
|
271
|
-
x_forwarded_for = "fwfor"
|
272
|
-
item = FeedbackRequest(ident="foo", good=True, task=FeedbackTasks.CHAT)
|
273
|
-
await pe.send_feedback("kbid", item, x_nucliadb_user, x_ndb_client, x_forwarded_for)
|
274
|
-
|
275
|
-
json_data = item.dict()
|
276
|
-
json_data["user_id"] = x_nucliadb_user
|
277
|
-
json_data["client"] = x_ndb_client
|
278
|
-
json_data["forwarded"] = x_forwarded_for
|
279
|
-
|
280
|
-
pe.session.post.assert_awaited_once_with(
|
281
|
-
url="cluster/api/v1/internal/predict/feedback",
|
282
|
-
json=json_data,
|
283
|
-
headers={"X-STF-KBID": "kbid"},
|
284
|
-
)
|
285
|
-
|
286
|
-
|
287
|
-
async def test_feedback_onprem():
|
288
|
-
pe = PredictEngine(
|
289
|
-
"cluster",
|
290
|
-
"public-{zone}",
|
291
|
-
zone="europe1",
|
292
|
-
onprem=True,
|
293
|
-
nuclia_service_account="nuakey",
|
294
|
-
)
|
295
|
-
|
296
|
-
pe.session = get_mocked_session("POST", 204, json="", context_manager=False)
|
297
|
-
|
298
|
-
x_nucliadb_user = "user"
|
299
|
-
x_ndb_client = "client"
|
300
|
-
x_forwarded_for = "fwfor"
|
301
|
-
item = FeedbackRequest(ident="foo", good=True, task=FeedbackTasks.CHAT)
|
302
|
-
await pe.send_feedback("kbid", item, x_nucliadb_user, x_ndb_client, x_forwarded_for)
|
303
|
-
|
304
|
-
json_data = item.dict()
|
305
|
-
json_data["user_id"] = x_nucliadb_user
|
306
|
-
json_data["client"] = x_ndb_client
|
307
|
-
json_data["forwarded"] = x_forwarded_for
|
308
|
-
|
309
|
-
pe.session.post.assert_awaited_once_with(
|
310
|
-
url="public-europe1/api/v1/predict/feedback/kbid",
|
311
|
-
json=json_data,
|
312
|
-
headers={"X-STF-NUAKEY": "Bearer nuakey"},
|
313
|
-
)
|
314
|
-
|
315
|
-
|
316
|
-
@pytest.mark.parametrize(
|
317
|
-
"content,exception",
|
318
|
-
[
|
319
|
-
("foobar", None),
|
320
|
-
("foobar0", None),
|
321
|
-
("foobar-1", RephraseError),
|
322
|
-
("foobar-2", RephraseMissingContextError),
|
323
|
-
],
|
324
|
-
)
|
325
|
-
async def test_parse_rephrase_response(content, exception):
|
326
|
-
resp = Mock()
|
327
|
-
resp.json = AsyncMock(return_value=content)
|
328
|
-
if exception:
|
329
|
-
with pytest.raises(exception):
|
330
|
-
await _parse_rephrase_response(resp)
|
331
|
-
else:
|
332
|
-
assert await _parse_rephrase_response(resp) == content.rstrip("0")
|
333
|
-
|
334
|
-
|
335
|
-
async def test_check_response_error():
|
336
|
-
response = aiohttp.ClientResponse(
|
337
|
-
"GET",
|
338
|
-
URL("http://predict:8080/api/v1/chat"),
|
339
|
-
writer=None,
|
340
|
-
continue100=Mock(),
|
341
|
-
timer=Mock(),
|
342
|
-
request_info=Mock(),
|
343
|
-
traces=[],
|
344
|
-
loop=Mock(),
|
345
|
-
session=Mock(),
|
346
|
-
)
|
347
|
-
response.status = 503
|
348
|
-
response._body = b"some error"
|
349
|
-
response._headers = {"Content-Type": "text/plain; charset=utf-8"}
|
350
|
-
|
351
|
-
with pytest.raises(ProxiedPredictAPIError) as ex:
|
352
|
-
await PredictEngine().check_response(response, expected_status=200)
|
353
|
-
assert ex.value.status == 503
|
354
|
-
assert ex.value.detail == "some error"
|
355
|
-
|
356
|
-
|
357
|
-
async def test_summarize():
|
358
|
-
pe = PredictEngine(
|
359
|
-
"cluster",
|
360
|
-
"public-{zone}",
|
361
|
-
zone="europe1",
|
362
|
-
onprem=False,
|
363
|
-
)
|
364
|
-
|
365
|
-
summarized = SummarizedResponse(
|
366
|
-
resources={"r1": SummarizedResource(summary="resource summary", tokens=10)}
|
367
|
-
)
|
368
|
-
pe.session = get_mocked_session(
|
369
|
-
"POST", 200, json=summarized.dict(), context_manager=False
|
370
|
-
)
|
371
|
-
|
372
|
-
item = SummarizeModel(
|
373
|
-
resources={"r1": SummarizeResourceModel(fields={"f1": "field extracted text"})}
|
374
|
-
)
|
375
|
-
summarize_response = await pe.summarize("kbid", item)
|
376
|
-
|
377
|
-
assert summarize_response == summarized
|
378
|
-
|
379
|
-
pe.session.post.assert_awaited_once_with(
|
380
|
-
url="cluster/api/v1/internal/predict/summarize",
|
381
|
-
json=item.dict(),
|
382
|
-
headers={"X-STF-KBID": "kbid"},
|
383
|
-
timeout=None,
|
384
|
-
)
|
385
|
-
|
386
|
-
|
387
|
-
async def test_summarize_onprem():
|
388
|
-
pe = PredictEngine(
|
389
|
-
"cluster",
|
390
|
-
"public-{zone}",
|
391
|
-
zone="europe1",
|
392
|
-
onprem=True,
|
393
|
-
nuclia_service_account="nuakey",
|
394
|
-
)
|
395
|
-
|
396
|
-
summarized = SummarizedResponse(
|
397
|
-
resources={"r1": SummarizedResource(summary="resource summary", tokens=10)}
|
398
|
-
)
|
399
|
-
pe.session = get_mocked_session(
|
400
|
-
"POST", 200, json=summarized.dict(), context_manager=False
|
401
|
-
)
|
402
|
-
|
403
|
-
item = SummarizeModel(
|
404
|
-
resources={"r1": SummarizeResourceModel(fields={"f1": "field extracted text"})}
|
405
|
-
)
|
406
|
-
summarize_response = await pe.summarize("kbid", item)
|
407
|
-
|
408
|
-
assert summarize_response == summarized
|
409
|
-
|
410
|
-
pe.session.post.assert_awaited_once_with(
|
411
|
-
url="public-europe1/api/v1/predict/summarize/kbid",
|
412
|
-
json=item.dict(),
|
413
|
-
headers={"X-STF-NUAKEY": "Bearer nuakey"},
|
414
|
-
timeout=None,
|
415
|
-
)
|
416
|
-
|
417
|
-
|
418
|
-
async def test_get_predict_headers_onprem():
|
419
|
-
nua_service_account = "nua-service-account"
|
420
|
-
pe = PredictEngine(
|
421
|
-
"cluster",
|
422
|
-
"public-{zone}",
|
423
|
-
zone="europe1",
|
424
|
-
onprem=True,
|
425
|
-
nuclia_service_account=nua_service_account,
|
426
|
-
)
|
427
|
-
assert pe.get_predict_headers("kbid") == {
|
428
|
-
"X-STF-NUAKEY": f"Bearer {nua_service_account}"
|
429
|
-
}
|
430
|
-
|
431
|
-
|
432
|
-
async def test_get_predict_headers_hosterd():
|
433
|
-
pe = PredictEngine(
|
434
|
-
"cluster",
|
435
|
-
"public-{zone}",
|
436
|
-
zone="europe1",
|
437
|
-
onprem=False,
|
438
|
-
)
|
439
|
-
assert pe.get_predict_headers("kbid") == {"X-STF-KBID": "kbid"}
|
440
|
-
|
441
|
-
|
442
|
-
async def test_get_answer_generator():
|
443
|
-
async def _iter_chunks():
|
444
|
-
await asyncio.sleep(0.1)
|
445
|
-
# Chunk, end_of_chunk
|
446
|
-
yield b"foo", False
|
447
|
-
yield b"bar", True
|
448
|
-
yield b"baz", True
|
449
|
-
|
450
|
-
resp = Mock()
|
451
|
-
resp.content.iter_chunks = Mock(return_value=_iter_chunks())
|
452
|
-
get_answer_generator(resp)
|
453
|
-
|
454
|
-
answer_chunks = [chunk async for chunk in get_answer_generator(resp)]
|
455
|
-
assert answer_chunks == [b"foobar", b"baz"]
|
456
|
-
|
457
|
-
|
458
|
-
async def test_get_chat_ndjson_generator():
|
459
|
-
streamed_elements = [
|
460
|
-
TextGenerativeResponse(text="foo"),
|
461
|
-
MetaGenerativeResponse(input_tokens=1, output_tokens=1, timings={"foo": 1}),
|
462
|
-
CitationsGenerativeResponse(citations={"foo": "bar"}),
|
463
|
-
StatusGenerativeResponse(code="-1", details="foo"),
|
464
|
-
]
|
465
|
-
|
466
|
-
async def _content():
|
467
|
-
for element in streamed_elements:
|
468
|
-
gen_chunk = GenerativeChunk(chunk=element)
|
469
|
-
yield gen_chunk.json() + "\n"
|
470
|
-
# yield an unknown chunk, to make sure it is ignored
|
471
|
-
yield '{"unknown": "chunk"}\n'
|
472
|
-
|
473
|
-
response = mock.Mock()
|
474
|
-
response.content = _content()
|
475
|
-
|
476
|
-
gen = get_chat_ndjson_generator(response)
|
477
|
-
|
478
|
-
parsed = [line async for line in gen]
|
479
|
-
assert len(parsed) == 4
|
480
|
-
assert parsed[0].chunk == TextGenerativeResponse(text="foo")
|
481
|
-
assert parsed[1].chunk == MetaGenerativeResponse(
|
482
|
-
input_tokens=1, output_tokens=1, timings={"foo": 1}
|
483
|
-
)
|
484
|
-
assert parsed[2].chunk == CitationsGenerativeResponse(citations={"foo": "bar"})
|
485
|
-
assert parsed[3].chunk == StatusGenerativeResponse(code="-1", details="foo")
|
486
|
-
|
487
|
-
|
488
|
-
async def test_chat_query_ndjson():
|
489
|
-
pe = PredictEngine(
|
490
|
-
"cluster",
|
491
|
-
"public-{zone}",
|
492
|
-
zone="europe1",
|
493
|
-
onprem=False,
|
494
|
-
)
|
495
|
-
streamed_elements = [
|
496
|
-
TextGenerativeResponse(text="foo"),
|
497
|
-
StatusGenerativeResponse(code="-1", details="foo"),
|
498
|
-
]
|
499
|
-
|
500
|
-
async def _content():
|
501
|
-
for element in streamed_elements:
|
502
|
-
gen_chunk = GenerativeChunk(chunk=element)
|
503
|
-
yield gen_chunk.json() + "\n"
|
504
|
-
|
505
|
-
chat_query_response = Mock()
|
506
|
-
chat_query_response.status = 200
|
507
|
-
chat_query_response.headers = {"NUCLIA-LEARNING-ID": "learning-id"}
|
508
|
-
chat_query_response.content = _content()
|
509
|
-
pe.session = mock.Mock()
|
510
|
-
pe.session.post = AsyncMock(return_value=chat_query_response)
|
511
|
-
|
512
|
-
item = ChatModel(question="foo", user_id="bar")
|
513
|
-
|
514
|
-
learning_id, generator = await pe.chat_query_ndjson("kbid", item)
|
515
|
-
|
516
|
-
assert learning_id == "learning-id"
|
517
|
-
parsed = [line async for line in generator]
|
518
|
-
assert len(parsed) == 2
|
519
|
-
assert parsed[0].chunk == TextGenerativeResponse(text="foo")
|
520
|
-
assert parsed[1].chunk == StatusGenerativeResponse(code="-1", details="foo")
|
521
|
-
|
522
|
-
# Make sure the request was made with the correct headers
|
523
|
-
pe.session.post.call_args_list[0].kwargs["headers"][
|
524
|
-
"Accept"
|
525
|
-
] == "application/ndjson"
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
pytest_plugins = [
|
21
|
-
"pytest_docker_fixtures",
|
22
|
-
"nucliadb.tests.fixtures",
|
23
|
-
"nucliadb.tests.tikv",
|
24
|
-
"nucliadb.ingest.tests.fixtures", # should be refactored out
|
25
|
-
"nucliadb.search.tests.fixtures",
|
26
|
-
"nucliadb.search.tests.node",
|
27
|
-
"nucliadb.standalone.tests.fixtures",
|
28
|
-
"nucliadb_utils.tests.conftest",
|
29
|
-
"nucliadb_utils.tests.gcs",
|
30
|
-
"nucliadb_utils.tests.nats",
|
31
|
-
"nucliadb_utils.tests.s3",
|
32
|
-
"nucliadb_utils.tests.indexing",
|
33
|
-
]
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import uuid
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
|
24
|
-
from nucliadb.search.api.v1.router import KB_PREFIX, KBS_PREFIX
|
25
|
-
|
26
|
-
|
27
|
-
@pytest.fixture(scope="function")
|
28
|
-
async def knowledgebox_one(nucliadb_manager):
|
29
|
-
kbslug = str(uuid.uuid4())
|
30
|
-
data = {"slug": kbslug}
|
31
|
-
resp = await nucliadb_manager.post(f"/{KBS_PREFIX}", json=data)
|
32
|
-
assert resp.status_code == 201
|
33
|
-
kbid = resp.json()["uuid"]
|
34
|
-
|
35
|
-
yield kbid
|
36
|
-
|
37
|
-
resp = await nucliadb_manager.delete(f"/{KB_PREFIX}/{kbid}")
|
38
|
-
assert resp.status_code == 200
|
@@ -1,18 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
from unittest.mock import Mock
|
21
|
-
|
22
|
-
import orjson
|
23
|
-
import pytest
|
24
|
-
from fastapi import Request
|
25
|
-
|
26
|
-
from nucliadb.standalone import api_router
|
27
|
-
from nucliadb.standalone.settings import Settings
|
28
|
-
|
29
|
-
pytestmark = pytest.mark.asyncio
|
30
|
-
|
31
|
-
|
32
|
-
class DummyTestRequest(Request):
|
33
|
-
@property
|
34
|
-
def auth(self):
|
35
|
-
return Mock(scopes=["READER"])
|
36
|
-
|
37
|
-
@property
|
38
|
-
def user(self):
|
39
|
-
return Mock(display_name="username")
|
40
|
-
|
41
|
-
@property
|
42
|
-
def app(self):
|
43
|
-
return Mock(settings=Settings(jwk_key=None))
|
44
|
-
|
45
|
-
|
46
|
-
@pytest.fixture
|
47
|
-
def http_request():
|
48
|
-
request = DummyTestRequest(
|
49
|
-
scope={
|
50
|
-
"type": "http",
|
51
|
-
"http_version": "1.1",
|
52
|
-
"method": "GET",
|
53
|
-
"headers": [],
|
54
|
-
}
|
55
|
-
)
|
56
|
-
yield request
|
57
|
-
|
58
|
-
|
59
|
-
async def test_get_temp_access_token(http_request):
|
60
|
-
resp = api_router.get_temp_access_token(http_request)
|
61
|
-
assert "token" in orjson.loads(resp.body)
|