nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -22,26 +22,17 @@ from typing import Optional, Union
|
|
22
22
|
|
23
23
|
from fastapi import HTTPException
|
24
24
|
from fastapi.responses import StreamingResponse
|
25
|
-
from fastapi_versioning import version
|
25
|
+
from fastapi_versioning import version
|
26
26
|
from google.protobuf.json_format import MessageToDict
|
27
|
-
from nucliadb_protos.knowledgebox_pb2 import Synonyms
|
28
|
-
from nucliadb_protos.writer_pb2 import (
|
29
|
-
GetEntitiesGroupRequest,
|
30
|
-
GetEntitiesGroupResponse,
|
31
|
-
GetLabelSetRequest,
|
32
|
-
GetLabelSetResponse,
|
33
|
-
GetLabelsRequest,
|
34
|
-
GetLabelsResponse,
|
35
|
-
ListEntitiesGroupsRequest,
|
36
|
-
ListEntitiesGroupsResponse,
|
37
|
-
)
|
38
27
|
from starlette.requests import Request
|
39
28
|
|
40
29
|
from nucliadb.common import datamanagers
|
41
30
|
from nucliadb.common.cluster.settings import in_standalone_mode
|
42
31
|
from nucliadb.common.context.fastapi import get_app_context
|
32
|
+
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
43
33
|
from nucliadb.common.http_clients import processing
|
44
34
|
from nucliadb.common.maindb.utils import get_driver
|
35
|
+
from nucliadb.common.models_utils import from_proto
|
45
36
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
46
37
|
from nucliadb.models.responses import HTTPClientError
|
47
38
|
from nucliadb.reader import SERVICE_NAME
|
@@ -49,12 +40,19 @@ from nucliadb.reader.api.v1.router import KB_PREFIX, api
|
|
49
40
|
from nucliadb.reader.reader.notifications import kb_notifications_stream
|
50
41
|
from nucliadb_models.entities import (
|
51
42
|
EntitiesGroup,
|
52
|
-
EntitiesGroupSummary,
|
53
43
|
KnowledgeBoxEntities,
|
54
44
|
)
|
55
45
|
from nucliadb_models.labels import KnowledgeBoxLabels, LabelSet
|
56
46
|
from nucliadb_models.resource import NucliaDBRoles
|
57
47
|
from nucliadb_models.synonyms import KnowledgeBoxSynonyms
|
48
|
+
from nucliadb_protos import writer_pb2
|
49
|
+
from nucliadb_protos.knowledgebox_pb2 import Synonyms
|
50
|
+
from nucliadb_protos.writer_pb2 import (
|
51
|
+
GetEntitiesGroupRequest,
|
52
|
+
GetEntitiesGroupResponse,
|
53
|
+
ListEntitiesGroupsRequest,
|
54
|
+
ListEntitiesGroupsResponse,
|
55
|
+
)
|
58
56
|
from nucliadb_utils.authentication import requires
|
59
57
|
from nucliadb_utils.utilities import get_ingest, get_storage
|
60
58
|
|
@@ -88,19 +86,15 @@ async def list_entities_groups(kbid: str):
|
|
88
86
|
if entities_groups.status == ListEntitiesGroupsResponse.Status.OK:
|
89
87
|
response = KnowledgeBoxEntities(uuid=kbid)
|
90
88
|
for key, eg_summary in entities_groups.groups.items():
|
91
|
-
entities_group =
|
89
|
+
entities_group = from_proto.entities_group_summary(eg_summary)
|
92
90
|
response.groups[key] = entities_group
|
93
91
|
return response
|
94
92
|
elif entities_groups.status == ListEntitiesGroupsResponse.Status.NOTFOUND:
|
95
93
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
96
94
|
elif entities_groups.status == ListEntitiesGroupsResponse.Status.ERROR:
|
97
|
-
raise HTTPException(
|
98
|
-
status_code=500, detail="Error while listing entities groups"
|
99
|
-
)
|
95
|
+
raise HTTPException(status_code=500, detail="Error while listing entities groups")
|
100
96
|
else:
|
101
|
-
raise HTTPException(
|
102
|
-
status_code=500, detail="Error on listing Knowledge box entities"
|
103
|
-
)
|
97
|
+
raise HTTPException(status_code=500, detail="Error on listing Knowledge box entities")
|
104
98
|
|
105
99
|
|
106
100
|
@api.get(
|
@@ -120,20 +114,14 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
|
|
120
114
|
|
121
115
|
kbobj: GetEntitiesGroupResponse = await ingest.GetEntitiesGroup(l_request) # type: ignore
|
122
116
|
if kbobj.status == GetEntitiesGroupResponse.Status.OK:
|
123
|
-
response =
|
117
|
+
response = from_proto.entities_group(kbobj.group)
|
124
118
|
return response
|
125
119
|
elif kbobj.status == GetEntitiesGroupResponse.Status.KB_NOT_FOUND:
|
126
|
-
raise HTTPException(
|
127
|
-
status_code=404, detail=f"Knowledge Box '{kbid}' does not exist"
|
128
|
-
)
|
120
|
+
raise HTTPException(status_code=404, detail=f"Knowledge Box '{kbid}' does not exist")
|
129
121
|
elif kbobj.status == GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND:
|
130
|
-
raise HTTPException(
|
131
|
-
status_code=404, detail=f"Entities group '{group}' does not exist"
|
132
|
-
)
|
122
|
+
raise HTTPException(status_code=404, detail=f"Entities group '{group}' does not exist")
|
133
123
|
else:
|
134
|
-
raise HTTPException(
|
135
|
-
status_code=500, detail="Error on getting entities group on a Knowledge box"
|
136
|
-
)
|
124
|
+
raise HTTPException(status_code=500, detail="Error on getting entities group on a Knowledge box")
|
137
125
|
|
138
126
|
|
139
127
|
@api.get(
|
@@ -145,30 +133,29 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
|
|
145
133
|
)
|
146
134
|
@requires(NucliaDBRoles.READER)
|
147
135
|
@version(1)
|
148
|
-
async def
|
149
|
-
|
150
|
-
|
151
|
-
|
136
|
+
async def get_labelsets_endoint(request: Request, kbid: str) -> KnowledgeBoxLabels:
|
137
|
+
try:
|
138
|
+
return await get_labelsets(kbid)
|
139
|
+
except KnowledgeBoxNotFound:
|
140
|
+
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
141
|
+
|
152
142
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
143
|
+
async def get_labelsets(kbid: str) -> KnowledgeBoxLabels:
|
144
|
+
kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
|
145
|
+
if not kb_exists:
|
146
|
+
raise KnowledgeBoxNotFound()
|
147
|
+
labelsets: writer_pb2.Labels = await datamanagers.atomic.labelset.get_all(kbid=kbid)
|
148
|
+
response = KnowledgeBoxLabels(uuid=kbid)
|
149
|
+
for labelset, labelset_data in labelsets.labelset.items():
|
150
|
+
labelset_response = LabelSet(
|
151
|
+
**MessageToDict(
|
152
|
+
labelset_data,
|
153
|
+
preserving_proto_field_name=True,
|
154
|
+
including_default_value_fields=True,
|
163
155
|
)
|
164
|
-
response.labelsets[labelset] = labelset_response
|
165
|
-
return response
|
166
|
-
elif kbobj.status == GetLabelsResponse.Status.NOTFOUND:
|
167
|
-
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
168
|
-
else:
|
169
|
-
raise HTTPException(
|
170
|
-
status_code=500, detail="Error on getting Knowledge box labels"
|
171
156
|
)
|
157
|
+
response.labelsets[labelset] = labelset_response
|
158
|
+
return response
|
172
159
|
|
173
160
|
|
174
161
|
@api.get(
|
@@ -180,28 +167,31 @@ async def get_labelsets(request: Request, kbid: str) -> KnowledgeBoxLabels:
|
|
180
167
|
)
|
181
168
|
@requires(NucliaDBRoles.READER)
|
182
169
|
@version(1)
|
183
|
-
async def
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
170
|
+
async def get_labelset_endpoint(request: Request, kbid: str, labelset: str) -> LabelSet:
|
171
|
+
try:
|
172
|
+
return await get_labelset(kbid, labelset)
|
173
|
+
except KnowledgeBoxNotFound:
|
174
|
+
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
175
|
+
|
188
176
|
|
189
|
-
|
190
|
-
|
177
|
+
async def get_labelset(kbid: str, labelset_id: str) -> LabelSet:
|
178
|
+
kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
|
179
|
+
if not kb_exists:
|
180
|
+
raise KnowledgeBoxNotFound()
|
181
|
+
labelset: Optional[writer_pb2.LabelSet] = await datamanagers.atomic.labelset.get(
|
182
|
+
kbid=kbid, labelset_id=labelset_id
|
183
|
+
)
|
184
|
+
if labelset is None:
|
185
|
+
response = LabelSet()
|
186
|
+
else:
|
191
187
|
response = LabelSet(
|
192
188
|
**MessageToDict(
|
193
|
-
|
189
|
+
labelset,
|
194
190
|
preserving_proto_field_name=True,
|
195
191
|
including_default_value_fields=True,
|
196
192
|
)
|
197
193
|
)
|
198
|
-
|
199
|
-
elif kbobj.status == GetLabelSetResponse.Status.NOTFOUND:
|
200
|
-
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
201
|
-
else:
|
202
|
-
raise HTTPException(
|
203
|
-
status_code=500, detail="Error on getting labelset on a Knowledge box"
|
204
|
-
)
|
194
|
+
return response
|
205
195
|
|
206
196
|
|
207
197
|
@api.get(
|
@@ -215,15 +205,10 @@ async def get_labelset(request: Request, kbid: str, labelset: str) -> LabelSet:
|
|
215
205
|
@requires(NucliaDBRoles.READER)
|
216
206
|
@version(1)
|
217
207
|
async def get_custom_synonyms(request: Request, kbid: str):
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
if synonyms is None:
|
224
|
-
synonyms = Synonyms()
|
225
|
-
|
226
|
-
return KnowledgeBoxSynonyms.from_message(synonyms)
|
208
|
+
if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
|
209
|
+
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
210
|
+
synonyms = await datamanagers.atomic.synonyms.get(kbid=kbid) or Synonyms()
|
211
|
+
return from_proto.kb_synonyms(synonyms)
|
227
212
|
|
228
213
|
|
229
214
|
@api.get(
|
@@ -262,7 +247,7 @@ async def notifications_endpoint(
|
|
262
247
|
|
263
248
|
|
264
249
|
async def exists_kb(kbid: str) -> bool:
|
265
|
-
async with datamanagers.
|
250
|
+
async with datamanagers.with_ro_transaction() as txn:
|
266
251
|
return await datamanagers.kb.exists_kb(txn, kbid=kbid)
|
267
252
|
|
268
253
|
|
@@ -290,14 +275,12 @@ async def processing_status(
|
|
290
275
|
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
291
276
|
|
292
277
|
async with processing.ProcessingHTTPClient() as client:
|
293
|
-
results = await client.requests(
|
294
|
-
cursor=cursor, scheduled=scheduled, kbid=kbid, limit=limit
|
295
|
-
)
|
278
|
+
results = await client.requests(cursor=cursor, scheduled=scheduled, kbid=kbid, limit=limit)
|
296
279
|
|
297
280
|
storage = await get_storage(service_name=SERVICE_NAME)
|
298
281
|
driver = get_driver()
|
299
282
|
|
300
|
-
async with driver.transaction(
|
283
|
+
async with driver.transaction(read_only=True) as txn:
|
301
284
|
kb = KnowledgeBox(txn, storage, kbid)
|
302
285
|
|
303
286
|
max_simultaneous = asyncio.Semaphore(10)
|
@@ -319,9 +302,7 @@ async def processing_status(
|
|
319
302
|
|
320
303
|
result_items = [
|
321
304
|
item
|
322
|
-
for item in await asyncio.gather(
|
323
|
-
*[_composition(result) for result in results.results]
|
324
|
-
)
|
305
|
+
for item in await asyncio.gather(*[_composition(result) for result in results.results])
|
325
306
|
if item is not None
|
326
307
|
]
|
327
308
|
|
nucliadb/reader/app.py
CHANGED
@@ -18,63 +18,46 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
import
|
21
|
+
import importlib.metadata
|
22
|
+
|
22
23
|
from fastapi import FastAPI
|
23
24
|
from starlette.middleware import Middleware
|
24
25
|
from starlette.middleware.authentication import AuthenticationMiddleware
|
25
|
-
from starlette.middleware.cors import CORSMiddleware
|
26
26
|
from starlette.requests import ClientDisconnect, Request
|
27
27
|
from starlette.responses import HTMLResponse
|
28
28
|
|
29
|
-
from nucliadb.common.context.fastapi import set_app_context
|
30
29
|
from nucliadb.reader import API_PREFIX
|
31
30
|
from nucliadb.reader.api.v1.router import api as api_v1
|
32
|
-
from nucliadb.reader.lifecycle import
|
31
|
+
from nucliadb.reader.lifecycle import lifespan
|
33
32
|
from nucliadb_telemetry import errors
|
34
33
|
from nucliadb_telemetry.fastapi.utils import (
|
35
34
|
client_disconnect_handler,
|
36
35
|
global_exception_handler,
|
37
36
|
)
|
38
|
-
from nucliadb_utils import
|
37
|
+
from nucliadb_utils.audit.stream import AuditMiddleware
|
39
38
|
from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
|
40
39
|
from nucliadb_utils.fastapi.openapi import extend_openapi
|
41
40
|
from nucliadb_utils.fastapi.versioning import VersionedFastAPI
|
42
|
-
from nucliadb_utils.settings import
|
43
|
-
from nucliadb_utils.utilities import
|
41
|
+
from nucliadb_utils.settings import running_settings
|
42
|
+
from nucliadb_utils.utilities import get_audit
|
44
43
|
|
45
44
|
middleware = []
|
46
|
-
|
47
|
-
if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
|
48
|
-
middleware.append(
|
49
|
-
Middleware(
|
50
|
-
CORSMiddleware,
|
51
|
-
allow_origins=http_settings.cors_origins,
|
52
|
-
allow_methods=["*"],
|
53
|
-
# Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
|
54
|
-
# Browsers already showing deprecation notices, so it needs to be specified explicitly
|
55
|
-
allow_headers=["*", "Authorization"],
|
56
|
-
)
|
57
|
-
)
|
58
|
-
|
59
45
|
middleware.extend(
|
60
46
|
[
|
61
47
|
Middleware(
|
62
48
|
AuthenticationMiddleware,
|
63
49
|
backend=NucliaCloudAuthenticationBackend(),
|
64
|
-
)
|
50
|
+
),
|
51
|
+
Middleware(AuditMiddleware, audit_utility_getter=get_audit),
|
65
52
|
]
|
66
53
|
)
|
67
54
|
|
68
|
-
errors.setup_error_handling(
|
69
|
-
|
70
|
-
on_startup = [initialize]
|
71
|
-
on_shutdown = [finalize]
|
55
|
+
errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
|
72
56
|
|
73
57
|
fastapi_settings = dict(
|
74
58
|
debug=running_settings.debug,
|
75
59
|
middleware=middleware,
|
76
|
-
|
77
|
-
on_shutdown=on_shutdown,
|
60
|
+
lifespan=lifespan,
|
78
61
|
exception_handlers={
|
79
62
|
Exception: global_exception_handler,
|
80
63
|
ClientDisconnect: client_disconnect_handler,
|
@@ -104,7 +87,7 @@ def create_application() -> FastAPI:
|
|
104
87
|
# Use raw starlette routes to avoid unnecessary overhead
|
105
88
|
application.add_route("/", homepage)
|
106
89
|
|
107
|
-
# Inject application context into the fastapi app's state
|
108
|
-
set_app_context(application)
|
90
|
+
# # Inject application context into the fastapi app's state
|
91
|
+
# set_app_context(application)
|
109
92
|
|
110
93
|
return application
|
nucliadb/reader/lifecycle.py
CHANGED
@@ -17,20 +17,34 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
from contextlib import asynccontextmanager
|
21
|
+
|
22
|
+
from fastapi import FastAPI
|
23
|
+
|
24
|
+
from nucliadb.common.context.fastapi import inject_app_context
|
20
25
|
from nucliadb.ingest.utils import start_ingest, stop_ingest
|
21
26
|
from nucliadb.reader import SERVICE_NAME
|
22
27
|
from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
|
23
|
-
from nucliadb_utils.utilities import
|
28
|
+
from nucliadb_utils.utilities import (
|
29
|
+
get_storage,
|
30
|
+
start_audit_utility,
|
31
|
+
stop_audit_utility,
|
32
|
+
teardown_storage,
|
33
|
+
)
|
24
34
|
|
25
35
|
|
26
|
-
|
36
|
+
@asynccontextmanager
|
37
|
+
async def lifespan(app: FastAPI):
|
27
38
|
await setup_telemetry(SERVICE_NAME)
|
28
|
-
|
39
|
+
await get_storage(service_name=SERVICE_NAME)
|
29
40
|
await start_ingest(SERVICE_NAME)
|
30
41
|
await start_audit_utility(SERVICE_NAME)
|
31
42
|
|
43
|
+
# Inject application context into the fastapi app's state
|
44
|
+
async with inject_app_context(app):
|
45
|
+
yield
|
32
46
|
|
33
|
-
async def finalize() -> None:
|
34
47
|
await stop_ingest()
|
35
48
|
await stop_audit_utility()
|
49
|
+
await teardown_storage()
|
36
50
|
await clean_telemetry(SERVICE_NAME)
|
nucliadb/reader/py.typed
ADDED
File without changes
|
@@ -59,9 +59,7 @@ RESOURCE_OP_PB_TO_MODEL = {
|
|
59
59
|
}
|
60
60
|
|
61
61
|
|
62
|
-
async def kb_notifications_stream(
|
63
|
-
context: ApplicationContext, kbid: str
|
64
|
-
) -> AsyncGenerator[bytes, None]:
|
62
|
+
async def kb_notifications_stream(context: ApplicationContext, kbid: str) -> AsyncGenerator[bytes, None]:
|
65
63
|
"""
|
66
64
|
Returns an async generator that yields pubsub notifications for the given kbid.
|
67
65
|
The generator will return after NOTIFICATIONS_TIMEOUT_S seconds.
|
@@ -70,13 +68,9 @@ async def kb_notifications_stream(
|
|
70
68
|
resource_cache: dict[str, str] = {}
|
71
69
|
async with async_timeout.timeout(NOTIFICATIONS_TIMEOUT_S):
|
72
70
|
async for pb_notification in kb_notifications(kbid):
|
73
|
-
notification = await serialize_notification(
|
74
|
-
context, pb_notification, resource_cache
|
75
|
-
)
|
71
|
+
notification = await serialize_notification(context, pb_notification, resource_cache)
|
76
72
|
line = encode_streamed_notification(notification) + b"\n"
|
77
|
-
logger.debug(
|
78
|
-
f"Sending notification: {notification.type}", extra={"kbid": kbid}
|
79
|
-
)
|
73
|
+
logger.debug(f"Sending notification: {notification.type}", extra={"kbid": kbid})
|
80
74
|
yield line
|
81
75
|
except asyncio.TimeoutError:
|
82
76
|
return
|
@@ -106,9 +100,7 @@ async def kb_notifications(kbid: str) -> AsyncGenerator[writer_pb2.Notification,
|
|
106
100
|
except asyncio.QueueFull: # pragma: no cover
|
107
101
|
logger.warning("Queue is full, dropping notification", extra={"kbid": kbid})
|
108
102
|
|
109
|
-
async with managed_subscription(
|
110
|
-
pubsub, key=subscription_key, handler=subscription_handler
|
111
|
-
):
|
103
|
+
async with managed_subscription(pubsub, key=subscription_key, handler=subscription_handler):
|
112
104
|
try:
|
113
105
|
while True:
|
114
106
|
notification: writer_pb2.Notification = await queue.get()
|
@@ -121,9 +113,7 @@ async def kb_notifications(kbid: str) -> AsyncGenerator[writer_pb2.Notification,
|
|
121
113
|
)
|
122
114
|
except Exception as ex:
|
123
115
|
capture_exception(ex)
|
124
|
-
logger.error(
|
125
|
-
"Error while streaming activity", exc_info=True, extra={"kbid": kbid}
|
126
|
-
)
|
116
|
+
logger.error("Error while streaming activity", exc_info=True, extra={"kbid": kbid})
|
127
117
|
return
|
128
118
|
|
129
119
|
|
@@ -144,9 +134,7 @@ async def managed_subscription(pubsub: PubSubDriver, key: str, handler: Callback
|
|
144
134
|
try:
|
145
135
|
await pubsub.unsubscribe(key=key, subscription_id=subscription_id)
|
146
136
|
except Exception: # pragma: no cover
|
147
|
-
logger.warning(
|
148
|
-
"Error while unsubscribing from activity stream", exc_info=True
|
149
|
-
)
|
137
|
+
logger.warning("Error while unsubscribing from activity stream", exc_info=True)
|
150
138
|
|
151
139
|
|
152
140
|
async def serialize_notification(
|
@@ -156,9 +144,7 @@ async def serialize_notification(
|
|
156
144
|
resource_uuid = pb.uuid
|
157
145
|
seqid = pb.seqid
|
158
146
|
|
159
|
-
resource_title = await get_resource_title_cached(
|
160
|
-
context.kv_driver, kbid, resource_uuid, cache
|
161
|
-
)
|
147
|
+
resource_title = await get_resource_title_cached(context.kv_driver, kbid, resource_uuid, cache)
|
162
148
|
if pb.action == writer_pb2.Notification.Action.INDEXED:
|
163
149
|
return ResourceIndexedNotification(
|
164
150
|
data=ResourceIndexed(
|
@@ -213,18 +199,14 @@ async def get_resource_title_cached(
|
|
213
199
|
return resource_title
|
214
200
|
|
215
201
|
|
216
|
-
async def get_resource_title(
|
217
|
-
kv_driver: Driver, kbid: str, resource_uuid: str
|
218
|
-
) -> Optional[str]:
|
202
|
+
async def get_resource_title(kv_driver: Driver, kbid: str, resource_uuid: str) -> Optional[str]:
|
219
203
|
async with kv_driver.transaction(read_only=True) as txn:
|
220
|
-
basic = await datamanagers.resources.get_basic(
|
221
|
-
txn, kbid=kbid, rid=resource_uuid
|
222
|
-
)
|
204
|
+
basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=resource_uuid)
|
223
205
|
if basic is None:
|
224
206
|
return None
|
225
207
|
return basic.title
|
226
208
|
|
227
209
|
|
228
210
|
def encode_streamed_notification(notification: Notification) -> bytes:
|
229
|
-
encoded_nofication = notification.
|
211
|
+
encoded_nofication = notification.model_dump_json().encode("utf-8")
|
230
212
|
return encoded_nofication
|
nucliadb/search/__init__.py
CHANGED
@@ -34,9 +34,7 @@ API_PREFIX = "api"
|
|
34
34
|
class EndpointFilter(logging.Filter):
|
35
35
|
def filter(self, record: logging.LogRecord) -> bool:
|
36
36
|
return (
|
37
|
-
record.args is not None
|
38
|
-
and len(record.args) >= 3
|
39
|
-
and record.args[2] not in ("/", "/metrics") # type: ignore
|
37
|
+
record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
|
40
38
|
)
|
41
39
|
|
42
40
|
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
from . import ask # noqa
|
21
|
-
from . import
|
21
|
+
from . import catalog # noqa
|
22
22
|
from . import feedback # noqa
|
23
23
|
from . import find # noqa
|
24
24
|
from . import knowledgebox # noqa
|
@@ -27,6 +27,5 @@ from . import search # noqa
|
|
27
27
|
from . import suggest # noqa
|
28
28
|
from . import summarize # noqa
|
29
29
|
from .resource import ask as ask_resource # noqa
|
30
|
-
from .resource import chat as chat_resource # noqa
|
31
30
|
from .resource import search as search_resource # noqa
|
32
31
|
from .router import api # noqa
|
nucliadb/search/api/v1/ask.py
CHANGED
@@ -25,7 +25,10 @@ from starlette.responses import StreamingResponse
|
|
25
25
|
|
26
26
|
from nucliadb.models.responses import HTTPClientError
|
27
27
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
28
|
+
from nucliadb.search.search import cache
|
28
29
|
from nucliadb.search.search.chat.ask import AskResult, ask, handled_ask_exceptions
|
30
|
+
from nucliadb.search.search.chat.exceptions import AnswerJsonSchemaTooLong
|
31
|
+
from nucliadb.search.search.utils import maybe_log_request_payload
|
29
32
|
from nucliadb_models.resource import NucliaDBRoles
|
30
33
|
from nucliadb_models.search import (
|
31
34
|
AskRequest,
|
@@ -43,8 +46,6 @@ from nucliadb_utils.authentication import requires
|
|
43
46
|
description="Ask questions on a Knowledge Box",
|
44
47
|
tags=["Search"],
|
45
48
|
response_model=SyncAskResponse,
|
46
|
-
# Add this to OpenAPI schema when endpoint is not in beta anymore
|
47
|
-
include_in_schema=False,
|
48
49
|
)
|
49
50
|
@requires(NucliaDBRoles.READER)
|
50
51
|
@version(1)
|
@@ -76,15 +77,21 @@ async def create_ask_response(
|
|
76
77
|
x_synchronous: bool,
|
77
78
|
resource: Optional[str] = None,
|
78
79
|
) -> Response:
|
80
|
+
maybe_log_request_payload(kbid, "/ask", ask_request)
|
79
81
|
ask_request.max_tokens = parse_max_tokens(ask_request.max_tokens)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
82
|
+
with cache.request_caches():
|
83
|
+
try:
|
84
|
+
ask_result: AskResult = await ask(
|
85
|
+
kbid=kbid,
|
86
|
+
ask_request=ask_request,
|
87
|
+
user_id=user_id,
|
88
|
+
client_type=client_type,
|
89
|
+
origin=origin,
|
90
|
+
resource=resource,
|
91
|
+
)
|
92
|
+
except AnswerJsonSchemaTooLong as err:
|
93
|
+
return HTTPClientError(status_code=400, detail=str(err))
|
94
|
+
|
88
95
|
headers = {
|
89
96
|
"NUCLIA-LEARNING-ID": ask_result.nuclia_learning_id or "unknown",
|
90
97
|
"Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
|