nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/reader/app.py
CHANGED
@@ -18,76 +18,46 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
import
|
21
|
+
import importlib.metadata
|
22
|
+
|
22
23
|
from fastapi import FastAPI
|
23
|
-
from fastapi.responses import JSONResponse
|
24
24
|
from starlette.middleware import Middleware
|
25
25
|
from starlette.middleware.authentication import AuthenticationMiddleware
|
26
|
-
from starlette.middleware.cors import CORSMiddleware
|
27
26
|
from starlette.requests import ClientDisconnect, Request
|
28
27
|
from starlette.responses import HTMLResponse
|
29
28
|
|
30
|
-
from nucliadb.common.context.fastapi import set_app_context
|
31
29
|
from nucliadb.reader import API_PREFIX
|
32
30
|
from nucliadb.reader.api.v1.router import api as api_v1
|
33
|
-
from nucliadb.reader.lifecycle import
|
31
|
+
from nucliadb.reader.lifecycle import lifespan
|
34
32
|
from nucliadb_telemetry import errors
|
35
|
-
from
|
33
|
+
from nucliadb_telemetry.fastapi.utils import (
|
34
|
+
client_disconnect_handler,
|
35
|
+
global_exception_handler,
|
36
|
+
)
|
37
|
+
from nucliadb_utils.audit.stream import AuditMiddleware
|
36
38
|
from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
|
37
39
|
from nucliadb_utils.fastapi.openapi import extend_openapi
|
38
40
|
from nucliadb_utils.fastapi.versioning import VersionedFastAPI
|
39
|
-
from nucliadb_utils.settings import
|
40
|
-
from nucliadb_utils.utilities import
|
41
|
+
from nucliadb_utils.settings import running_settings
|
42
|
+
from nucliadb_utils.utilities import get_audit
|
41
43
|
|
42
44
|
middleware = []
|
43
|
-
|
44
|
-
if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
|
45
|
-
middleware.append(
|
46
|
-
Middleware(
|
47
|
-
CORSMiddleware,
|
48
|
-
allow_origins=http_settings.cors_origins,
|
49
|
-
allow_methods=["*"],
|
50
|
-
# Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
|
51
|
-
# Browsers already showing deprecation notices, so it needs to be specified explicitly
|
52
|
-
allow_headers=["*", "Authorization"],
|
53
|
-
)
|
54
|
-
)
|
55
|
-
|
56
45
|
middleware.extend(
|
57
46
|
[
|
58
47
|
Middleware(
|
59
48
|
AuthenticationMiddleware,
|
60
49
|
backend=NucliaCloudAuthenticationBackend(),
|
61
|
-
)
|
50
|
+
),
|
51
|
+
Middleware(AuditMiddleware, audit_utility_getter=get_audit),
|
62
52
|
]
|
63
53
|
)
|
64
54
|
|
65
|
-
errors.setup_error_handling(
|
66
|
-
|
67
|
-
on_startup = [initialize]
|
68
|
-
on_shutdown = [finalize]
|
69
|
-
|
70
|
-
|
71
|
-
async def global_exception_handler(request: Request, exc: Exception):
|
72
|
-
errors.capture_exception(exc)
|
73
|
-
return JSONResponse(
|
74
|
-
status_code=500,
|
75
|
-
content={"detail": "Something went wrong, please contact your administrator"},
|
76
|
-
)
|
77
|
-
|
78
|
-
|
79
|
-
async def client_disconnect_handler(request: Request, exc: ClientDisconnect):
|
80
|
-
return JSONResponse(
|
81
|
-
status_code=200,
|
82
|
-
content={"detail": "Client disconnected while an operation was in course"},
|
83
|
-
)
|
84
|
-
|
55
|
+
errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
|
85
56
|
|
86
57
|
fastapi_settings = dict(
|
87
58
|
debug=running_settings.debug,
|
88
59
|
middleware=middleware,
|
89
|
-
|
90
|
-
on_shutdown=on_shutdown,
|
60
|
+
lifespan=lifespan,
|
91
61
|
exception_handlers={
|
92
62
|
Exception: global_exception_handler,
|
93
63
|
ClientDisconnect: client_disconnect_handler,
|
@@ -117,7 +87,7 @@ def create_application() -> FastAPI:
|
|
117
87
|
# Use raw starlette routes to avoid unnecessary overhead
|
118
88
|
application.add_route("/", homepage)
|
119
89
|
|
120
|
-
# Inject application context into the fastapi app's state
|
121
|
-
set_app_context(application)
|
90
|
+
# # Inject application context into the fastapi app's state
|
91
|
+
# set_app_context(application)
|
122
92
|
|
123
93
|
return application
|
nucliadb/reader/lifecycle.py
CHANGED
@@ -17,20 +17,34 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
from contextlib import asynccontextmanager
|
21
|
+
|
22
|
+
from fastapi import FastAPI
|
23
|
+
|
24
|
+
from nucliadb.common.context.fastapi import inject_app_context
|
20
25
|
from nucliadb.ingest.utils import start_ingest, stop_ingest
|
21
26
|
from nucliadb.reader import SERVICE_NAME
|
22
27
|
from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
|
23
|
-
from nucliadb_utils.utilities import
|
28
|
+
from nucliadb_utils.utilities import (
|
29
|
+
get_storage,
|
30
|
+
start_audit_utility,
|
31
|
+
stop_audit_utility,
|
32
|
+
teardown_storage,
|
33
|
+
)
|
24
34
|
|
25
35
|
|
26
|
-
|
36
|
+
@asynccontextmanager
|
37
|
+
async def lifespan(app: FastAPI):
|
27
38
|
await setup_telemetry(SERVICE_NAME)
|
28
|
-
|
39
|
+
await get_storage(service_name=SERVICE_NAME)
|
29
40
|
await start_ingest(SERVICE_NAME)
|
30
41
|
await start_audit_utility(SERVICE_NAME)
|
31
42
|
|
43
|
+
# Inject application context into the fastapi app's state
|
44
|
+
async with inject_app_context(app):
|
45
|
+
yield
|
32
46
|
|
33
|
-
async def finalize() -> None:
|
34
47
|
await stop_ingest()
|
35
48
|
await stop_audit_utility()
|
49
|
+
await teardown_storage()
|
36
50
|
await clean_telemetry(SERVICE_NAME)
|
nucliadb/reader/py.typed
ADDED
File without changes
|
@@ -59,9 +59,7 @@ RESOURCE_OP_PB_TO_MODEL = {
|
|
59
59
|
}
|
60
60
|
|
61
61
|
|
62
|
-
async def kb_notifications_stream(
|
63
|
-
context: ApplicationContext, kbid: str
|
64
|
-
) -> AsyncGenerator[bytes, None]:
|
62
|
+
async def kb_notifications_stream(context: ApplicationContext, kbid: str) -> AsyncGenerator[bytes, None]:
|
65
63
|
"""
|
66
64
|
Returns an async generator that yields pubsub notifications for the given kbid.
|
67
65
|
The generator will return after NOTIFICATIONS_TIMEOUT_S seconds.
|
@@ -70,13 +68,9 @@ async def kb_notifications_stream(
|
|
70
68
|
resource_cache: dict[str, str] = {}
|
71
69
|
async with async_timeout.timeout(NOTIFICATIONS_TIMEOUT_S):
|
72
70
|
async for pb_notification in kb_notifications(kbid):
|
73
|
-
notification = await serialize_notification(
|
74
|
-
context, pb_notification, resource_cache
|
75
|
-
)
|
71
|
+
notification = await serialize_notification(context, pb_notification, resource_cache)
|
76
72
|
line = encode_streamed_notification(notification) + b"\n"
|
77
|
-
logger.debug(
|
78
|
-
f"Sending notification: {notification.type}", extra={"kbid": kbid}
|
79
|
-
)
|
73
|
+
logger.debug(f"Sending notification: {notification.type}", extra={"kbid": kbid})
|
80
74
|
yield line
|
81
75
|
except asyncio.TimeoutError:
|
82
76
|
return
|
@@ -101,17 +95,12 @@ async def kb_notifications(kbid: str) -> AsyncGenerator[writer_pb2.Notification,
|
|
101
95
|
data = pubsub.parse(raw_data)
|
102
96
|
notification = writer_pb2.Notification()
|
103
97
|
notification.ParseFromString(data)
|
104
|
-
# We don't need the whole broker message, so we clear it to
|
105
|
-
# save space, as it can potentially be very big
|
106
|
-
notification.ClearField("message")
|
107
98
|
try:
|
108
99
|
queue.put_nowait(notification)
|
109
100
|
except asyncio.QueueFull: # pragma: no cover
|
110
101
|
logger.warning("Queue is full, dropping notification", extra={"kbid": kbid})
|
111
102
|
|
112
|
-
async with managed_subscription(
|
113
|
-
pubsub, key=subscription_key, handler=subscription_handler
|
114
|
-
):
|
103
|
+
async with managed_subscription(pubsub, key=subscription_key, handler=subscription_handler):
|
115
104
|
try:
|
116
105
|
while True:
|
117
106
|
notification: writer_pb2.Notification = await queue.get()
|
@@ -124,9 +113,7 @@ async def kb_notifications(kbid: str) -> AsyncGenerator[writer_pb2.Notification,
|
|
124
113
|
)
|
125
114
|
except Exception as ex:
|
126
115
|
capture_exception(ex)
|
127
|
-
logger.error(
|
128
|
-
"Error while streaming activity", exc_info=True, extra={"kbid": kbid}
|
129
|
-
)
|
116
|
+
logger.error("Error while streaming activity", exc_info=True, extra={"kbid": kbid})
|
130
117
|
return
|
131
118
|
|
132
119
|
|
@@ -147,9 +134,7 @@ async def managed_subscription(pubsub: PubSubDriver, key: str, handler: Callback
|
|
147
134
|
try:
|
148
135
|
await pubsub.unsubscribe(key=key, subscription_id=subscription_id)
|
149
136
|
except Exception: # pragma: no cover
|
150
|
-
logger.warning(
|
151
|
-
"Error while unsubscribing from activity stream", exc_info=True
|
152
|
-
)
|
137
|
+
logger.warning("Error while unsubscribing from activity stream", exc_info=True)
|
153
138
|
|
154
139
|
|
155
140
|
async def serialize_notification(
|
@@ -159,9 +144,7 @@ async def serialize_notification(
|
|
159
144
|
resource_uuid = pb.uuid
|
160
145
|
seqid = pb.seqid
|
161
146
|
|
162
|
-
resource_title = await get_resource_title_cached(
|
163
|
-
context.kv_driver, kbid, resource_uuid, cache
|
164
|
-
)
|
147
|
+
resource_title = await get_resource_title_cached(context.kv_driver, kbid, resource_uuid, cache)
|
165
148
|
if pb.action == writer_pb2.Notification.Action.INDEXED:
|
166
149
|
return ResourceIndexedNotification(
|
167
150
|
data=ResourceIndexed(
|
@@ -216,18 +199,14 @@ async def get_resource_title_cached(
|
|
216
199
|
return resource_title
|
217
200
|
|
218
201
|
|
219
|
-
async def get_resource_title(
|
220
|
-
kv_driver: Driver, kbid: str, resource_uuid: str
|
221
|
-
) -> Optional[str]:
|
202
|
+
async def get_resource_title(kv_driver: Driver, kbid: str, resource_uuid: str) -> Optional[str]:
|
222
203
|
async with kv_driver.transaction(read_only=True) as txn:
|
223
|
-
basic = await datamanagers.resources.
|
224
|
-
txn, kbid=kbid, rid=resource_uuid
|
225
|
-
)
|
204
|
+
basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=resource_uuid)
|
226
205
|
if basic is None:
|
227
206
|
return None
|
228
207
|
return basic.title
|
229
208
|
|
230
209
|
|
231
210
|
def encode_streamed_notification(notification: Notification) -> bytes:
|
232
|
-
encoded_nofication = notification.
|
211
|
+
encoded_nofication = notification.model_dump_json().encode("utf-8")
|
233
212
|
return encoded_nofication
|
nucliadb/search/__init__.py
CHANGED
@@ -34,9 +34,7 @@ API_PREFIX = "api"
|
|
34
34
|
class EndpointFilter(logging.Filter):
|
35
35
|
def filter(self, record: logging.LogRecord) -> bool:
|
36
36
|
return (
|
37
|
-
record.args is not None
|
38
|
-
and len(record.args) >= 3
|
39
|
-
and record.args[2] not in ("/", "/metrics") # type: ignore
|
37
|
+
record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
|
40
38
|
)
|
41
39
|
|
42
40
|
|
@@ -17,7 +17,8 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from . import
|
20
|
+
from . import ask # noqa
|
21
|
+
from . import catalog # noqa
|
21
22
|
from . import feedback # noqa
|
22
23
|
from . import find # noqa
|
23
24
|
from . import knowledgebox # noqa
|
@@ -26,6 +27,5 @@ from . import search # noqa
|
|
26
27
|
from . import suggest # noqa
|
27
28
|
from . import summarize # noqa
|
28
29
|
from .resource import ask as ask_resource # noqa
|
29
|
-
from .resource import chat as chat_resource # noqa
|
30
30
|
from .resource import search as search_resource # noqa
|
31
31
|
from .router import api # noqa
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
from typing import Optional, Union
|
21
|
+
|
22
|
+
from fastapi import Header, Request, Response
|
23
|
+
from fastapi_versioning import version
|
24
|
+
from starlette.responses import StreamingResponse
|
25
|
+
|
26
|
+
from nucliadb.models.responses import HTTPClientError
|
27
|
+
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
28
|
+
from nucliadb.search.search import cache
|
29
|
+
from nucliadb.search.search.chat.ask import AskResult, ask, handled_ask_exceptions
|
30
|
+
from nucliadb.search.search.chat.exceptions import AnswerJsonSchemaTooLong
|
31
|
+
from nucliadb.search.search.utils import maybe_log_request_payload
|
32
|
+
from nucliadb_models.resource import NucliaDBRoles
|
33
|
+
from nucliadb_models.search import (
|
34
|
+
AskRequest,
|
35
|
+
NucliaDBClientType,
|
36
|
+
SyncAskResponse,
|
37
|
+
parse_max_tokens,
|
38
|
+
)
|
39
|
+
from nucliadb_utils.authentication import requires
|
40
|
+
|
41
|
+
|
42
|
+
@api.post(
|
43
|
+
f"/{KB_PREFIX}/{{kbid}}/ask",
|
44
|
+
status_code=200,
|
45
|
+
summary="Ask Knowledge Box",
|
46
|
+
description="Ask questions on a Knowledge Box",
|
47
|
+
tags=["Search"],
|
48
|
+
response_model=SyncAskResponse,
|
49
|
+
)
|
50
|
+
@requires(NucliaDBRoles.READER)
|
51
|
+
@version(1)
|
52
|
+
async def ask_knowledgebox_endpoint(
|
53
|
+
request: Request,
|
54
|
+
kbid: str,
|
55
|
+
item: AskRequest,
|
56
|
+
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
57
|
+
x_nucliadb_user: str = Header(""),
|
58
|
+
x_forwarded_for: str = Header(""),
|
59
|
+
x_synchronous: bool = Header(
|
60
|
+
default=False,
|
61
|
+
description="When set to true, outputs response as JSON in a non-streaming way. "
|
62
|
+
"This is slower and requires waiting for entire answer to be ready.",
|
63
|
+
),
|
64
|
+
) -> Union[StreamingResponse, HTTPClientError, Response]:
|
65
|
+
return await create_ask_response(
|
66
|
+
kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
|
67
|
+
)
|
68
|
+
|
69
|
+
|
70
|
+
@handled_ask_exceptions
|
71
|
+
async def create_ask_response(
|
72
|
+
kbid: str,
|
73
|
+
ask_request: AskRequest,
|
74
|
+
user_id: str,
|
75
|
+
client_type: NucliaDBClientType,
|
76
|
+
origin: str,
|
77
|
+
x_synchronous: bool,
|
78
|
+
resource: Optional[str] = None,
|
79
|
+
) -> Response:
|
80
|
+
maybe_log_request_payload(kbid, "/ask", ask_request)
|
81
|
+
ask_request.max_tokens = parse_max_tokens(ask_request.max_tokens)
|
82
|
+
with cache.request_caches():
|
83
|
+
try:
|
84
|
+
ask_result: AskResult = await ask(
|
85
|
+
kbid=kbid,
|
86
|
+
ask_request=ask_request,
|
87
|
+
user_id=user_id,
|
88
|
+
client_type=client_type,
|
89
|
+
origin=origin,
|
90
|
+
resource=resource,
|
91
|
+
)
|
92
|
+
except AnswerJsonSchemaTooLong as err:
|
93
|
+
return HTTPClientError(status_code=400, detail=str(err))
|
94
|
+
|
95
|
+
headers = {
|
96
|
+
"NUCLIA-LEARNING-ID": ask_result.nuclia_learning_id or "unknown",
|
97
|
+
"Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
|
98
|
+
}
|
99
|
+
if x_synchronous:
|
100
|
+
return Response(
|
101
|
+
content=await ask_result.json(),
|
102
|
+
status_code=200,
|
103
|
+
headers=headers,
|
104
|
+
media_type="application/json",
|
105
|
+
)
|
106
|
+
else:
|
107
|
+
return StreamingResponse(
|
108
|
+
content=ask_result.ndjson_stream(),
|
109
|
+
status_code=200,
|
110
|
+
headers=headers,
|
111
|
+
media_type="application/x-ndjson",
|
112
|
+
)
|
@@ -0,0 +1,184 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
from time import time
|
21
|
+
from typing import Optional, Union
|
22
|
+
|
23
|
+
from fastapi import Request, Response
|
24
|
+
from fastapi_versioning import version
|
25
|
+
|
26
|
+
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
27
|
+
from nucliadb.common.maindb.pg import PGDriver
|
28
|
+
from nucliadb.common.maindb.utils import get_driver
|
29
|
+
from nucliadb.models.responses import HTTPClientError
|
30
|
+
from nucliadb.search import logger
|
31
|
+
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
32
|
+
from nucliadb.search.api.v1.utils import fastapi_query
|
33
|
+
from nucliadb.search.search import cache
|
34
|
+
from nucliadb.search.search.exceptions import InvalidQueryError
|
35
|
+
from nucliadb.search.search.merge import fetch_resources
|
36
|
+
from nucliadb.search.search.pgcatalog import pgcatalog_search
|
37
|
+
from nucliadb.search.search.query_parser.parser import parse_catalog
|
38
|
+
from nucliadb.search.search.utils import (
|
39
|
+
maybe_log_request_payload,
|
40
|
+
)
|
41
|
+
from nucliadb_models.common import FieldTypeName
|
42
|
+
from nucliadb_models.metadata import ResourceProcessingStatus
|
43
|
+
from nucliadb_models.resource import NucliaDBRoles
|
44
|
+
from nucliadb_models.search import (
|
45
|
+
CatalogRequest,
|
46
|
+
CatalogResponse,
|
47
|
+
KnowledgeboxSearchResults,
|
48
|
+
ResourceProperties,
|
49
|
+
SearchParamDefaults,
|
50
|
+
SortField,
|
51
|
+
SortOptions,
|
52
|
+
SortOrder,
|
53
|
+
)
|
54
|
+
from nucliadb_models.utils import DateTime
|
55
|
+
from nucliadb_utils.authentication import requires
|
56
|
+
from nucliadb_utils.exceptions import LimitsExceededError
|
57
|
+
|
58
|
+
|
59
|
+
@api.get(
|
60
|
+
f"/{KB_PREFIX}/{{kbid}}/catalog",
|
61
|
+
status_code=200,
|
62
|
+
summary="List resources of a Knowledge Box",
|
63
|
+
description="List resources of a Knowledge Box",
|
64
|
+
response_model=KnowledgeboxSearchResults,
|
65
|
+
response_model_exclude_unset=True,
|
66
|
+
tags=["Search"],
|
67
|
+
)
|
68
|
+
@requires(NucliaDBRoles.READER)
|
69
|
+
@version(1)
|
70
|
+
async def catalog_get(
|
71
|
+
request: Request,
|
72
|
+
response: Response,
|
73
|
+
kbid: str,
|
74
|
+
query: str = fastapi_query(SearchParamDefaults.query),
|
75
|
+
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
76
|
+
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
77
|
+
sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
|
78
|
+
sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
|
79
|
+
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
80
|
+
page_number: int = fastapi_query(SearchParamDefaults.catalog_page_number),
|
81
|
+
page_size: int = fastapi_query(SearchParamDefaults.catalog_page_size),
|
82
|
+
shards: list[str] = fastapi_query(SearchParamDefaults.shards, deprecated=True),
|
83
|
+
with_status: Optional[ResourceProcessingStatus] = fastapi_query(
|
84
|
+
SearchParamDefaults.with_status, deprecated="Use filters instead"
|
85
|
+
),
|
86
|
+
debug: bool = fastapi_query(SearchParamDefaults.debug, include_in_schema=False),
|
87
|
+
range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
|
88
|
+
range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
|
89
|
+
range_modification_start: Optional[DateTime] = fastapi_query(
|
90
|
+
SearchParamDefaults.range_modification_start
|
91
|
+
),
|
92
|
+
range_modification_end: Optional[DateTime] = fastapi_query(
|
93
|
+
SearchParamDefaults.range_modification_end
|
94
|
+
),
|
95
|
+
hidden: Optional[bool] = fastapi_query(SearchParamDefaults.hidden),
|
96
|
+
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
97
|
+
item = CatalogRequest(
|
98
|
+
query=query,
|
99
|
+
filters=filters,
|
100
|
+
faceted=faceted,
|
101
|
+
page_number=page_number,
|
102
|
+
page_size=page_size,
|
103
|
+
shards=shards,
|
104
|
+
debug=debug,
|
105
|
+
with_status=with_status,
|
106
|
+
range_creation_start=range_creation_start,
|
107
|
+
range_creation_end=range_creation_end,
|
108
|
+
range_modification_start=range_modification_start,
|
109
|
+
range_modification_end=range_modification_end,
|
110
|
+
hidden=hidden,
|
111
|
+
)
|
112
|
+
if sort_field:
|
113
|
+
item.sort = SortOptions(field=sort_field, limit=sort_limit, order=sort_order)
|
114
|
+
return await catalog(kbid, item)
|
115
|
+
|
116
|
+
|
117
|
+
@api.post(
|
118
|
+
f"/{KB_PREFIX}/{{kbid}}/catalog",
|
119
|
+
status_code=200,
|
120
|
+
summary="List resources of a Knowledge Box",
|
121
|
+
description="List resources of a Knowledge Box",
|
122
|
+
response_model=KnowledgeboxSearchResults,
|
123
|
+
response_model_exclude_unset=True,
|
124
|
+
tags=["Search"],
|
125
|
+
)
|
126
|
+
@requires(NucliaDBRoles.READER)
|
127
|
+
@version(1)
|
128
|
+
async def catalog_post(
|
129
|
+
request: Request,
|
130
|
+
kbid: str,
|
131
|
+
item: CatalogRequest,
|
132
|
+
) -> Union[CatalogResponse, HTTPClientError]:
|
133
|
+
return await catalog(kbid, item)
|
134
|
+
|
135
|
+
|
136
|
+
async def catalog(
|
137
|
+
kbid: str,
|
138
|
+
item: CatalogRequest,
|
139
|
+
):
|
140
|
+
"""
|
141
|
+
Catalog endpoint is a simplified version of the search endpoint, it only
|
142
|
+
returns bm25 results on titles and it does not support vector search.
|
143
|
+
It is useful for listing resources in a knowledge box.
|
144
|
+
"""
|
145
|
+
if not pgcatalog_enabled(): # pragma: no cover
|
146
|
+
return HTTPClientError(status_code=501, detail="PG driver is needed for catalog search")
|
147
|
+
|
148
|
+
maybe_log_request_payload(kbid, "/catalog", item)
|
149
|
+
start_time = time()
|
150
|
+
try:
|
151
|
+
with cache.request_caches():
|
152
|
+
query_parser = parse_catalog(kbid, item)
|
153
|
+
|
154
|
+
catalog_results = CatalogResponse()
|
155
|
+
catalog_results.fulltext = await pgcatalog_search(query_parser)
|
156
|
+
catalog_results.resources = await fetch_resources(
|
157
|
+
resources=[r.rid for r in catalog_results.fulltext.results],
|
158
|
+
kbid=kbid,
|
159
|
+
show=[ResourceProperties.BASIC, ResourceProperties.ERRORS],
|
160
|
+
field_type_filter=list(FieldTypeName),
|
161
|
+
extracted=[],
|
162
|
+
)
|
163
|
+
return catalog_results
|
164
|
+
except InvalidQueryError as exc:
|
165
|
+
return HTTPClientError(status_code=412, detail=str(exc))
|
166
|
+
except KnowledgeBoxNotFound:
|
167
|
+
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
168
|
+
except LimitsExceededError as exc:
|
169
|
+
return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
|
170
|
+
finally:
|
171
|
+
duration = time() - start_time
|
172
|
+
if duration > 2: # pragma: no cover
|
173
|
+
logger.warning(
|
174
|
+
"Slow catalog request",
|
175
|
+
extra={
|
176
|
+
"kbid": kbid,
|
177
|
+
"duration": duration,
|
178
|
+
"query": item.model_dump_json(),
|
179
|
+
},
|
180
|
+
)
|
181
|
+
|
182
|
+
|
183
|
+
def pgcatalog_enabled():
|
184
|
+
return isinstance(get_driver(), PGDriver)
|
@@ -18,24 +18,24 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
|
22
21
|
from fastapi import Header, Request, Response
|
23
22
|
from fastapi_versioning import version
|
24
23
|
|
24
|
+
from nucliadb.common.models_utils import to_proto
|
25
25
|
from nucliadb.models.responses import HTTPClientError
|
26
|
-
from nucliadb.search import logger
|
26
|
+
from nucliadb.search import logger
|
27
27
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
28
|
-
from nucliadb.search.utilities import get_predict
|
29
28
|
from nucliadb_models.resource import NucliaDBRoles
|
30
29
|
from nucliadb_models.search import FeedbackRequest, NucliaDBClientType
|
31
30
|
from nucliadb_telemetry import errors
|
32
31
|
from nucliadb_utils.authentication import requires
|
32
|
+
from nucliadb_utils.utilities import get_audit
|
33
33
|
|
34
34
|
|
35
35
|
@api.post(
|
36
36
|
f"/{KB_PREFIX}/{{kbid}}/feedback",
|
37
37
|
status_code=200,
|
38
|
-
|
38
|
+
summary="Send Feedback",
|
39
39
|
description="Send feedback for a search operation in a Knowledge Box",
|
40
40
|
tags=["Search"],
|
41
41
|
)
|
@@ -51,28 +51,20 @@ async def send_feedback_endpoint(
|
|
51
51
|
x_forwarded_for: str = Header(""),
|
52
52
|
):
|
53
53
|
try:
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
54
|
+
audit = get_audit()
|
55
|
+
if audit is not None:
|
56
|
+
audit.feedback(
|
57
|
+
kbid=kbid,
|
58
|
+
user=x_nucliadb_user,
|
59
|
+
client_type=to_proto.client_type(x_ndb_client),
|
60
|
+
origin=x_forwarded_for,
|
61
|
+
learning_id=item.ident,
|
62
|
+
good=item.good,
|
63
|
+
task=to_proto.feedback_task(item.task),
|
64
|
+
feedback=item.feedback,
|
65
|
+
text_block_id=item.text_block_id,
|
66
|
+
)
|
62
67
|
except Exception as ex:
|
63
68
|
errors.capture_exception(ex)
|
64
69
|
logger.exception("Unexpected error sending feedback", extra={"kbid": kbid})
|
65
70
|
return HTTPClientError(status_code=500, detail=f"Internal server error")
|
66
|
-
|
67
|
-
|
68
|
-
async def send_feedback(
|
69
|
-
kbid: str,
|
70
|
-
item: FeedbackRequest,
|
71
|
-
x_nucliadb_user: str,
|
72
|
-
x_ndb_client: NucliaDBClientType,
|
73
|
-
x_forwarded_for: str,
|
74
|
-
):
|
75
|
-
predict = get_predict()
|
76
|
-
await predict.send_feedback(
|
77
|
-
kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for
|
78
|
-
)
|