nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,8 +17,7 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from
|
21
|
-
from typing import Optional, Union
|
20
|
+
from typing import Optional, Union, cast
|
22
21
|
|
23
22
|
from fastapi import Header, Request, Response
|
24
23
|
from fastapi_versioning import version
|
@@ -27,28 +26,27 @@ from nucliadb.models.responses import HTTPClientError
|
|
27
26
|
from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
|
28
27
|
from nucliadb.search.api.v1.utils import fastapi_query
|
29
28
|
from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
|
29
|
+
from nucliadb.search.search import cache
|
30
30
|
from nucliadb.search.search.exceptions import InvalidQueryError
|
31
31
|
from nucliadb.search.search.merge import merge_paragraphs_results
|
32
32
|
from nucliadb.search.search.query import paragraph_query_to_pb
|
33
|
-
from nucliadb_models.
|
34
|
-
from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
|
33
|
+
from nucliadb_models.resource import NucliaDBRoles
|
35
34
|
from nucliadb_models.search import (
|
36
35
|
NucliaDBClientType,
|
37
|
-
ResourceProperties,
|
38
36
|
ResourceSearchResults,
|
39
|
-
SearchOptions,
|
40
37
|
SearchParamDefaults,
|
41
38
|
SortField,
|
42
39
|
SortOrder,
|
43
40
|
)
|
41
|
+
from nucliadb_models.utils import DateTime
|
44
42
|
from nucliadb_utils.authentication import requires_one
|
45
43
|
|
46
44
|
|
47
45
|
@api.get(
|
48
46
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/search",
|
49
47
|
status_code=200,
|
50
|
-
|
51
|
-
description="Search on a
|
48
|
+
summary="Search on Resource",
|
49
|
+
description="Search on a single resource",
|
52
50
|
tags=["Search"],
|
53
51
|
response_model_exclude_unset=True,
|
54
52
|
response_model=ResourceSearchResults,
|
@@ -64,81 +62,62 @@ async def resource_search(
|
|
64
62
|
fields: list[str] = fastapi_query(SearchParamDefaults.fields),
|
65
63
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
66
64
|
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
67
|
-
sort: Optional[SortField] = fastapi_query(
|
68
|
-
SearchParamDefaults.sort_field, alias="sort_field"
|
69
|
-
),
|
65
|
+
sort: Optional[SortField] = fastapi_query(SearchParamDefaults.sort_field, alias="sort_field"),
|
70
66
|
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
),
|
76
|
-
range_creation_end: Optional[datetime] = fastapi_query(
|
77
|
-
SearchParamDefaults.range_creation_end
|
78
|
-
),
|
79
|
-
range_modification_start: Optional[datetime] = fastapi_query(
|
67
|
+
top_k: Optional[int] = fastapi_query(SearchParamDefaults.top_k),
|
68
|
+
range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
|
69
|
+
range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
|
70
|
+
range_modification_start: Optional[DateTime] = fastapi_query(
|
80
71
|
SearchParamDefaults.range_modification_start
|
81
72
|
),
|
82
|
-
range_modification_end: Optional[
|
73
|
+
range_modification_end: Optional[DateTime] = fastapi_query(
|
83
74
|
SearchParamDefaults.range_modification_end
|
84
75
|
),
|
85
76
|
highlight: bool = fastapi_query(SearchParamDefaults.highlight),
|
86
|
-
show: list[ResourceProperties] = fastapi_query(
|
87
|
-
SearchParamDefaults.show, default=list(ResourceProperties)
|
88
|
-
),
|
89
|
-
field_type_filter: list[FieldTypeName] = fastapi_query(
|
90
|
-
SearchParamDefaults.field_type_filter, alias="field_type"
|
91
|
-
),
|
92
|
-
extracted: list[ExtractedDataTypeName] = fastapi_query(
|
93
|
-
SearchParamDefaults.extracted
|
94
|
-
),
|
95
77
|
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
96
78
|
debug: bool = fastapi_query(SearchParamDefaults.debug),
|
97
79
|
shards: list[str] = fastapi_query(SearchParamDefaults.shards),
|
98
80
|
) -> Union[ResourceSearchResults, HTTPClientError]:
|
99
|
-
|
100
|
-
|
101
|
-
pb_query = await paragraph_query_to_pb(
|
102
|
-
kbid,
|
103
|
-
[SearchOptions.PARAGRAPH],
|
104
|
-
rid,
|
105
|
-
query,
|
106
|
-
fields,
|
107
|
-
filters,
|
108
|
-
faceted,
|
109
|
-
page_number,
|
110
|
-
page_size,
|
111
|
-
range_creation_start,
|
112
|
-
range_creation_end,
|
113
|
-
range_modification_start,
|
114
|
-
range_modification_end,
|
115
|
-
sort=sort.value if sort else None,
|
116
|
-
sort_ord=sort_order.value,
|
117
|
-
)
|
118
|
-
except InvalidQueryError as exc:
|
119
|
-
return HTTPClientError(status_code=412, detail=str(exc))
|
81
|
+
top_k = top_k or SearchParamDefaults.top_k # type: ignore
|
82
|
+
top_k = cast(int, top_k)
|
120
83
|
|
121
|
-
|
122
|
-
|
123
|
-
|
84
|
+
with cache.request_caches():
|
85
|
+
try:
|
86
|
+
pb_query = await paragraph_query_to_pb(
|
87
|
+
kbid,
|
88
|
+
rid,
|
89
|
+
query,
|
90
|
+
fields,
|
91
|
+
filters,
|
92
|
+
faceted,
|
93
|
+
top_k,
|
94
|
+
range_creation_start,
|
95
|
+
range_creation_end,
|
96
|
+
range_modification_start,
|
97
|
+
range_modification_end,
|
98
|
+
sort=sort.value if sort else None,
|
99
|
+
sort_ord=sort_order.value,
|
100
|
+
)
|
101
|
+
except InvalidQueryError as exc:
|
102
|
+
return HTTPClientError(status_code=412, detail=str(exc))
|
124
103
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
104
|
+
results, incomplete_results, queried_nodes = await node_query(
|
105
|
+
kbid, Method.SEARCH, pb_query, shards
|
106
|
+
)
|
107
|
+
|
108
|
+
# We need to merge
|
109
|
+
search_results = await merge_paragraphs_results(
|
110
|
+
results,
|
111
|
+
top_k=top_k,
|
112
|
+
kbid=kbid,
|
113
|
+
highlight_split=highlight,
|
114
|
+
min_score=0.0,
|
115
|
+
)
|
137
116
|
|
138
|
-
|
139
|
-
|
140
|
-
|
117
|
+
response.status_code = 206 if incomplete_results else 200
|
118
|
+
if debug:
|
119
|
+
search_results.nodes = debug_nodes_info(queried_nodes)
|
141
120
|
|
142
|
-
|
143
|
-
|
144
|
-
|
121
|
+
queried_shards = [shard_id for _, shard_id in queried_nodes]
|
122
|
+
search_results.shards = queried_shards
|
123
|
+
return search_results
|
nucliadb/search/api/v1/router.py
CHANGED
nucliadb/search/api/v1/search.py
CHANGED
@@ -18,25 +18,27 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import json
|
21
|
-
from datetime import datetime
|
22
21
|
from time import time
|
23
22
|
from typing import Optional, Union
|
24
23
|
|
25
24
|
from fastapi import Body, Header, Query, Request, Response
|
26
25
|
from fastapi.openapi.models import Example
|
27
26
|
from fastapi_versioning import version
|
28
|
-
from pydantic
|
27
|
+
from pydantic import ValidationError
|
29
28
|
|
30
29
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
30
|
+
from nucliadb.common.models_utils import to_proto
|
31
31
|
from nucliadb.models.responses import HTTPClientError
|
32
32
|
from nucliadb.search import predict
|
33
33
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
34
34
|
from nucliadb.search.api.v1.utils import fastapi_query
|
35
35
|
from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
|
36
|
+
from nucliadb.search.search import cache
|
36
37
|
from nucliadb.search.search.exceptions import InvalidQueryError
|
37
38
|
from nucliadb.search.search.merge import merge_results
|
38
39
|
from nucliadb.search.search.query import QueryParser
|
39
40
|
from nucliadb.search.search.utils import (
|
41
|
+
filter_hidden_resources,
|
40
42
|
min_score_from_payload,
|
41
43
|
min_score_from_query_params,
|
42
44
|
should_disable_vector_search,
|
@@ -45,9 +47,7 @@ from nucliadb_models.common import FieldTypeName
|
|
45
47
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
46
48
|
from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
|
47
49
|
from nucliadb_models.search import (
|
48
|
-
CatalogRequest,
|
49
50
|
KnowledgeboxSearchResults,
|
50
|
-
MinScore,
|
51
51
|
NucliaDBClientType,
|
52
52
|
ResourceProperties,
|
53
53
|
SearchOptions,
|
@@ -58,6 +58,7 @@ from nucliadb_models.search import (
|
|
58
58
|
SortOrder,
|
59
59
|
)
|
60
60
|
from nucliadb_models.security import RequestSecurity
|
61
|
+
from nucliadb_models.utils import DateTime
|
61
62
|
from nucliadb_utils.authentication import requires
|
62
63
|
from nucliadb_utils.exceptions import LimitsExceededError
|
63
64
|
from nucliadb_utils.utilities import get_audit
|
@@ -69,7 +70,7 @@ SEARCH_EXAMPLES = {
|
|
69
70
|
value={
|
70
71
|
"query": "Noam Chomsky",
|
71
72
|
"filters": ["/icon/application/pdf"],
|
72
|
-
"features": [SearchOptions.
|
73
|
+
"features": [SearchOptions.FULLTEXT],
|
73
74
|
},
|
74
75
|
),
|
75
76
|
"get_language_counts": Example(
|
@@ -78,7 +79,7 @@ SEARCH_EXAMPLES = {
|
|
78
79
|
value={
|
79
80
|
"page_size": 0,
|
80
81
|
"faceted": ["/s/p"],
|
81
|
-
"features": [SearchOptions.
|
82
|
+
"features": [SearchOptions.FULLTEXT],
|
82
83
|
},
|
83
84
|
),
|
84
85
|
}
|
@@ -87,8 +88,8 @@ SEARCH_EXAMPLES = {
|
|
87
88
|
@api.get(
|
88
89
|
f"/{KB_PREFIX}/{{kbid}}/search",
|
89
90
|
status_code=200,
|
90
|
-
|
91
|
-
description="Search on a Knowledge Box",
|
91
|
+
summary="Search Knowledge Box",
|
92
|
+
description="Search on a Knowledge Box and retrieve separate results for documents, paragraphs, and sentences. Usually, it is better to use `find`", # noqa: E501
|
92
93
|
response_model=KnowledgeboxSearchResults,
|
93
94
|
response_model_exclude_unset=True,
|
94
95
|
tags=["Search"],
|
@@ -106,40 +107,36 @@ async def search_knowledgebox(
|
|
106
107
|
sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
|
107
108
|
sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
|
108
109
|
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
109
|
-
|
110
|
-
page_size: int = fastapi_query(SearchParamDefaults.page_size),
|
110
|
+
top_k: int = fastapi_query(SearchParamDefaults.top_k),
|
111
111
|
min_score: Optional[float] = Query(
|
112
112
|
default=None,
|
113
|
-
description="Minimum similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/
|
113
|
+
description="Minimum similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
|
114
114
|
deprecated=True,
|
115
115
|
),
|
116
116
|
min_score_semantic: Optional[float] = Query(
|
117
117
|
default=None,
|
118
|
-
description="Minimum semantic similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/
|
118
|
+
description="Minimum semantic similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
|
119
119
|
),
|
120
120
|
min_score_bm25: float = Query(
|
121
121
|
default=0,
|
122
122
|
description="Minimum bm25 score to filter paragraph and document index results",
|
123
123
|
ge=0,
|
124
124
|
),
|
125
|
-
|
126
|
-
|
127
|
-
),
|
128
|
-
|
129
|
-
SearchParamDefaults.range_creation_end
|
130
|
-
),
|
131
|
-
range_modification_start: Optional[datetime] = fastapi_query(
|
125
|
+
vectorset: Optional[str] = fastapi_query(SearchParamDefaults.vectorset),
|
126
|
+
range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
|
127
|
+
range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
|
128
|
+
range_modification_start: Optional[DateTime] = fastapi_query(
|
132
129
|
SearchParamDefaults.range_modification_start
|
133
130
|
),
|
134
|
-
range_modification_end: Optional[
|
131
|
+
range_modification_end: Optional[DateTime] = fastapi_query(
|
135
132
|
SearchParamDefaults.range_modification_end
|
136
133
|
),
|
137
134
|
features: list[SearchOptions] = fastapi_query(
|
138
135
|
SearchParamDefaults.search_features,
|
139
136
|
default=[
|
140
|
-
SearchOptions.
|
141
|
-
SearchOptions.
|
142
|
-
SearchOptions.
|
137
|
+
SearchOptions.KEYWORD,
|
138
|
+
SearchOptions.FULLTEXT,
|
139
|
+
SearchOptions.SEMANTIC,
|
143
140
|
],
|
144
141
|
),
|
145
142
|
debug: bool = fastapi_query(SearchParamDefaults.debug),
|
@@ -148,14 +145,13 @@ async def search_knowledgebox(
|
|
148
145
|
field_type_filter: list[FieldTypeName] = fastapi_query(
|
149
146
|
SearchParamDefaults.field_type_filter, alias="field_type"
|
150
147
|
),
|
151
|
-
extracted: list[ExtractedDataTypeName] = fastapi_query(
|
152
|
-
SearchParamDefaults.extracted
|
153
|
-
),
|
148
|
+
extracted: list[ExtractedDataTypeName] = fastapi_query(SearchParamDefaults.extracted),
|
154
149
|
shards: list[str] = fastapi_query(SearchParamDefaults.shards),
|
155
150
|
with_duplicates: bool = fastapi_query(SearchParamDefaults.with_duplicates),
|
156
151
|
with_synonyms: bool = fastapi_query(SearchParamDefaults.with_synonyms),
|
157
152
|
autofilter: bool = fastapi_query(SearchParamDefaults.autofilter),
|
158
153
|
security_groups: list[str] = fastapi_query(SearchParamDefaults.security_groups),
|
154
|
+
show_hidden: bool = fastapi_query(SearchParamDefaults.show_hidden),
|
159
155
|
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
160
156
|
x_nucliadb_user: str = Header(""),
|
161
157
|
x_forwarded_for: str = Header(""),
|
@@ -174,11 +170,9 @@ async def search_knowledgebox(
|
|
174
170
|
if sort_field is not None
|
175
171
|
else None
|
176
172
|
),
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
min_score_bm25, min_score_semantic, min_score
|
181
|
-
),
|
173
|
+
top_k=top_k,
|
174
|
+
min_score=min_score_from_query_params(min_score_bm25, min_score_semantic, min_score),
|
175
|
+
vectorset=vectorset,
|
182
176
|
range_creation_end=range_creation_end,
|
183
177
|
range_creation_start=range_creation_start,
|
184
178
|
range_modification_end=range_modification_end,
|
@@ -194,160 +188,19 @@ async def search_knowledgebox(
|
|
194
188
|
with_synonyms=with_synonyms,
|
195
189
|
autofilter=autofilter,
|
196
190
|
security=security,
|
191
|
+
show_hidden=show_hidden,
|
197
192
|
)
|
198
193
|
except ValidationError as exc:
|
199
194
|
detail = json.loads(exc.json())
|
200
195
|
return HTTPClientError(status_code=422, detail=detail)
|
201
|
-
return await _search_endpoint(
|
202
|
-
response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
|
203
|
-
)
|
204
|
-
|
205
|
-
|
206
|
-
@api.get(
|
207
|
-
f"/{KB_PREFIX}/{{kbid}}/catalog",
|
208
|
-
status_code=200,
|
209
|
-
name="List resources of a Knowledge Box",
|
210
|
-
description="List resources of a Knowledge Box",
|
211
|
-
response_model=KnowledgeboxSearchResults,
|
212
|
-
response_model_exclude_unset=True,
|
213
|
-
tags=["Search"],
|
214
|
-
)
|
215
|
-
@requires(NucliaDBRoles.READER)
|
216
|
-
@version(1)
|
217
|
-
async def catalog_get(
|
218
|
-
request: Request,
|
219
|
-
response: Response,
|
220
|
-
kbid: str,
|
221
|
-
query: str = fastapi_query(SearchParamDefaults.query),
|
222
|
-
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
223
|
-
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
224
|
-
sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
|
225
|
-
sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
|
226
|
-
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
227
|
-
page_number: int = fastapi_query(SearchParamDefaults.page_number),
|
228
|
-
page_size: int = fastapi_query(SearchParamDefaults.page_size),
|
229
|
-
shards: list[str] = fastapi_query(SearchParamDefaults.shards),
|
230
|
-
with_status: Optional[ResourceProcessingStatus] = fastapi_query(
|
231
|
-
SearchParamDefaults.with_status
|
232
|
-
),
|
233
|
-
debug: bool = fastapi_query(SearchParamDefaults.debug),
|
234
|
-
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
235
|
-
item = CatalogRequest(
|
236
|
-
query=query,
|
237
|
-
filters=filters,
|
238
|
-
faceted=faceted,
|
239
|
-
page_number=page_number,
|
240
|
-
page_size=page_size,
|
241
|
-
shards=shards,
|
242
|
-
debug=debug,
|
243
|
-
with_status=with_status,
|
244
|
-
)
|
245
|
-
if sort_field:
|
246
|
-
item.sort = SortOptions(field=sort_field, limit=sort_limit, order=sort_order)
|
247
|
-
return await catalog(kbid, item)
|
248
|
-
|
249
|
-
|
250
|
-
@api.post(
|
251
|
-
f"/{KB_PREFIX}/{{kbid}}/catalog",
|
252
|
-
status_code=200,
|
253
|
-
name="List resources of a Knowledge Box",
|
254
|
-
description="List resources of a Knowledge Box",
|
255
|
-
response_model=KnowledgeboxSearchResults,
|
256
|
-
response_model_exclude_unset=True,
|
257
|
-
tags=["Search"],
|
258
|
-
)
|
259
|
-
@requires(NucliaDBRoles.READER)
|
260
|
-
@version(1)
|
261
|
-
async def catalog_post(
|
262
|
-
request: Request,
|
263
|
-
kbid: str,
|
264
|
-
item: CatalogRequest,
|
265
|
-
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
266
|
-
return await catalog(kbid, item)
|
267
|
-
|
268
|
-
|
269
|
-
async def catalog(
|
270
|
-
kbid: str,
|
271
|
-
item: CatalogRequest,
|
272
|
-
):
|
273
|
-
"""
|
274
|
-
Catalog endpoint is a simplified version of the search endpoint, it only
|
275
|
-
returns bm25 results on titles and it does not support vector search.
|
276
|
-
It is useful for listing resources in a knowledge box.
|
277
|
-
"""
|
278
|
-
try:
|
279
|
-
sort = item.sort
|
280
|
-
if item.sort is None:
|
281
|
-
# By default we sort by creation date (most recent first)
|
282
|
-
sort = SortOptions(
|
283
|
-
field=SortField.CREATED,
|
284
|
-
order=SortOrder.DESC,
|
285
|
-
limit=None,
|
286
|
-
)
|
287
|
-
|
288
|
-
query_parser = QueryParser(
|
289
|
-
kbid=kbid,
|
290
|
-
features=[SearchOptions.DOCUMENT],
|
291
|
-
query=item.query,
|
292
|
-
filters=item.filters,
|
293
|
-
faceted=item.faceted,
|
294
|
-
sort=sort,
|
295
|
-
page_number=item.page_number,
|
296
|
-
page_size=item.page_size,
|
297
|
-
min_score=MinScore(bm25=0, semantic=0),
|
298
|
-
fields=["a/title"],
|
299
|
-
with_status=item.with_status,
|
300
|
-
)
|
301
|
-
pb_query, _, _ = await query_parser.parse()
|
302
|
-
|
303
|
-
(results, _, queried_nodes) = await node_query(
|
304
|
-
kbid,
|
305
|
-
Method.SEARCH,
|
306
|
-
pb_query,
|
307
|
-
target_shard_replicas=item.shards,
|
308
|
-
# Catalog should not go to read replicas because we want it to be
|
309
|
-
# consistent and most up to date results
|
310
|
-
use_read_replica_nodes=False,
|
311
|
-
)
|
312
|
-
|
313
|
-
# We need to merge
|
314
|
-
search_results = await merge_results(
|
315
|
-
results,
|
316
|
-
count=item.page_size,
|
317
|
-
page=item.page_number,
|
318
|
-
kbid=kbid,
|
319
|
-
show=[ResourceProperties.BASIC],
|
320
|
-
field_type_filter=[],
|
321
|
-
extracted=[],
|
322
|
-
sort=sort,
|
323
|
-
requested_relations=pb_query.relation_subgraph,
|
324
|
-
min_score=query_parser.min_score,
|
325
|
-
highlight=False,
|
326
|
-
)
|
327
|
-
# We don't need sentences, paragraphs or relations on the catalog
|
328
|
-
# response, so we set to None so that fastapi doesn't include them
|
329
|
-
# in the response payload
|
330
|
-
search_results.sentences = None
|
331
|
-
search_results.paragraphs = None
|
332
|
-
search_results.relations = None
|
333
|
-
if item.debug:
|
334
|
-
search_results.nodes = debug_nodes_info(queried_nodes)
|
335
|
-
queried_shards = [shard_id for _, shard_id in queried_nodes]
|
336
|
-
search_results.shards = queried_shards
|
337
|
-
return search_results
|
338
|
-
except InvalidQueryError as exc:
|
339
|
-
return HTTPClientError(status_code=412, detail=str(exc))
|
340
|
-
except KnowledgeBoxNotFound:
|
341
|
-
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
342
|
-
except LimitsExceededError as exc:
|
343
|
-
return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
|
196
|
+
return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
|
344
197
|
|
345
198
|
|
346
199
|
@api.post(
|
347
200
|
f"/{KB_PREFIX}/{{kbid}}/search",
|
348
201
|
status_code=200,
|
349
|
-
|
350
|
-
description="Search on a Knowledge Box",
|
202
|
+
summary="Search Knowledge Box",
|
203
|
+
description="Search on a Knowledge Box and retrieve separate results for documents, paragraphs, and sentences. Usually, it is better to use `find`", # noqa: E501
|
351
204
|
response_model=KnowledgeboxSearchResults,
|
352
205
|
response_model_exclude_unset=True,
|
353
206
|
tags=["Search"],
|
@@ -363,9 +216,7 @@ async def search_post_knowledgebox(
|
|
363
216
|
x_nucliadb_user: str = Header(""),
|
364
217
|
x_forwarded_for: str = Header(""),
|
365
218
|
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
366
|
-
return await _search_endpoint(
|
367
|
-
response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
|
368
|
-
)
|
219
|
+
return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
|
369
220
|
|
370
221
|
|
371
222
|
async def _search_endpoint(
|
@@ -377,13 +228,13 @@ async def _search_endpoint(
|
|
377
228
|
x_forwarded_for: str,
|
378
229
|
**kwargs,
|
379
230
|
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
380
|
-
# All endpoint logic should be here
|
381
231
|
try:
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
232
|
+
with cache.request_caches():
|
233
|
+
results, incomplete = await search(
|
234
|
+
kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, **kwargs
|
235
|
+
)
|
236
|
+
response.status_code = 206 if incomplete else 200
|
237
|
+
return results
|
387
238
|
except KnowledgeBoxNotFound:
|
388
239
|
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
389
240
|
except LimitsExceededError as exc:
|
@@ -392,8 +243,8 @@ async def _search_endpoint(
|
|
392
243
|
return HTTPClientError(status_code=412, detail=str(exc))
|
393
244
|
except predict.ProxiedPredictAPIError as err:
|
394
245
|
return HTTPClientError(
|
395
|
-
status_code=
|
396
|
-
detail=
|
246
|
+
status_code=err.status,
|
247
|
+
detail=err.detail,
|
397
248
|
)
|
398
249
|
|
399
250
|
|
@@ -411,20 +262,20 @@ async def search(
|
|
411
262
|
|
412
263
|
item.min_score = min_score_from_payload(item.min_score)
|
413
264
|
|
414
|
-
if SearchOptions.
|
265
|
+
if SearchOptions.SEMANTIC in item.features:
|
415
266
|
if should_disable_vector_search(item):
|
416
|
-
item.features.remove(SearchOptions.
|
267
|
+
item.features.remove(SearchOptions.SEMANTIC)
|
417
268
|
|
418
269
|
# We need to query all nodes
|
419
270
|
query_parser = QueryParser(
|
420
271
|
kbid=kbid,
|
421
272
|
features=item.features,
|
422
273
|
query=item.query,
|
423
|
-
|
274
|
+
label_filters=item.filters,
|
275
|
+
keyword_filters=[],
|
424
276
|
faceted=item.faceted,
|
425
277
|
sort=item.sort,
|
426
|
-
|
427
|
-
page_size=item.page_size,
|
278
|
+
top_k=item.top_k,
|
428
279
|
min_score=item.min_score,
|
429
280
|
range_creation_start=item.range_creation_start,
|
430
281
|
range_creation_end=item.range_creation_end,
|
@@ -439,6 +290,8 @@ async def search(
|
|
439
290
|
autofilter=item.autofilter,
|
440
291
|
security=item.security,
|
441
292
|
rephrase=item.rephrase,
|
293
|
+
hidden=await filter_hidden_resources(kbid, item.show_hidden),
|
294
|
+
rephrase_prompt=item.rephrase_prompt,
|
442
295
|
)
|
443
296
|
pb_query, incomplete_results, autofilters = await query_parser.parse()
|
444
297
|
|
@@ -451,28 +304,28 @@ async def search(
|
|
451
304
|
# We need to merge
|
452
305
|
search_results = await merge_results(
|
453
306
|
results,
|
454
|
-
|
455
|
-
page=item.page_number,
|
307
|
+
top_k=item.top_k,
|
456
308
|
kbid=kbid,
|
457
309
|
show=item.show,
|
458
310
|
field_type_filter=item.field_type_filter,
|
459
311
|
extracted=item.extracted,
|
460
|
-
sort=query_parser.sort,
|
312
|
+
sort=query_parser.sort, # type: ignore
|
461
313
|
requested_relations=pb_query.relation_subgraph,
|
462
314
|
min_score=query_parser.min_score,
|
463
315
|
highlight=item.highlight,
|
464
316
|
)
|
465
317
|
|
466
318
|
if audit is not None and do_audit:
|
467
|
-
|
319
|
+
audit.search(
|
468
320
|
kbid,
|
469
321
|
x_nucliadb_user,
|
470
|
-
|
322
|
+
to_proto.client_type(x_ndb_client),
|
471
323
|
x_forwarded_for,
|
472
324
|
pb_query,
|
473
325
|
time() - start_time,
|
474
326
|
len(search_results.resources),
|
475
327
|
)
|
328
|
+
|
476
329
|
if item.debug:
|
477
330
|
search_results.nodes = debug_nodes_info(queried_nodes)
|
478
331
|
|