nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -17,8 +17,7 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from
|
21
|
-
from typing import Optional, Union
|
20
|
+
from typing import Optional, Union, cast
|
22
21
|
|
23
22
|
from fastapi import Header, Request, Response
|
24
23
|
from fastapi_versioning import version
|
@@ -27,20 +26,19 @@ from nucliadb.models.responses import HTTPClientError
|
|
27
26
|
from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
|
28
27
|
from nucliadb.search.api.v1.utils import fastapi_query
|
29
28
|
from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
|
29
|
+
from nucliadb.search.search import cache
|
30
30
|
from nucliadb.search.search.exceptions import InvalidQueryError
|
31
31
|
from nucliadb.search.search.merge import merge_paragraphs_results
|
32
32
|
from nucliadb.search.search.query import paragraph_query_to_pb
|
33
|
-
from nucliadb_models.
|
34
|
-
from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
|
33
|
+
from nucliadb_models.resource import NucliaDBRoles
|
35
34
|
from nucliadb_models.search import (
|
36
35
|
NucliaDBClientType,
|
37
|
-
ResourceProperties,
|
38
36
|
ResourceSearchResults,
|
39
|
-
SearchOptions,
|
40
37
|
SearchParamDefaults,
|
41
38
|
SortField,
|
42
39
|
SortOrder,
|
43
40
|
)
|
41
|
+
from nucliadb_models.utils import DateTime
|
44
42
|
from nucliadb_utils.authentication import requires_one
|
45
43
|
|
46
44
|
|
@@ -64,81 +62,62 @@ async def resource_search(
|
|
64
62
|
fields: list[str] = fastapi_query(SearchParamDefaults.fields),
|
65
63
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
66
64
|
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
67
|
-
sort: Optional[SortField] = fastapi_query(
|
68
|
-
SearchParamDefaults.sort_field, alias="sort_field"
|
69
|
-
),
|
65
|
+
sort: Optional[SortField] = fastapi_query(SearchParamDefaults.sort_field, alias="sort_field"),
|
70
66
|
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
),
|
76
|
-
range_creation_end: Optional[datetime] = fastapi_query(
|
77
|
-
SearchParamDefaults.range_creation_end
|
78
|
-
),
|
79
|
-
range_modification_start: Optional[datetime] = fastapi_query(
|
67
|
+
top_k: Optional[int] = fastapi_query(SearchParamDefaults.top_k),
|
68
|
+
range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
|
69
|
+
range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
|
70
|
+
range_modification_start: Optional[DateTime] = fastapi_query(
|
80
71
|
SearchParamDefaults.range_modification_start
|
81
72
|
),
|
82
|
-
range_modification_end: Optional[
|
73
|
+
range_modification_end: Optional[DateTime] = fastapi_query(
|
83
74
|
SearchParamDefaults.range_modification_end
|
84
75
|
),
|
85
76
|
highlight: bool = fastapi_query(SearchParamDefaults.highlight),
|
86
|
-
show: list[ResourceProperties] = fastapi_query(
|
87
|
-
SearchParamDefaults.show, default=list(ResourceProperties)
|
88
|
-
),
|
89
|
-
field_type_filter: list[FieldTypeName] = fastapi_query(
|
90
|
-
SearchParamDefaults.field_type_filter, alias="field_type"
|
91
|
-
),
|
92
|
-
extracted: list[ExtractedDataTypeName] = fastapi_query(
|
93
|
-
SearchParamDefaults.extracted
|
94
|
-
),
|
95
77
|
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
96
78
|
debug: bool = fastapi_query(SearchParamDefaults.debug),
|
97
79
|
shards: list[str] = fastapi_query(SearchParamDefaults.shards),
|
98
80
|
) -> Union[ResourceSearchResults, HTTPClientError]:
|
99
|
-
|
100
|
-
|
101
|
-
pb_query = await paragraph_query_to_pb(
|
102
|
-
kbid,
|
103
|
-
[SearchOptions.PARAGRAPH],
|
104
|
-
rid,
|
105
|
-
query,
|
106
|
-
fields,
|
107
|
-
filters,
|
108
|
-
faceted,
|
109
|
-
page_number,
|
110
|
-
page_size,
|
111
|
-
range_creation_start,
|
112
|
-
range_creation_end,
|
113
|
-
range_modification_start,
|
114
|
-
range_modification_end,
|
115
|
-
sort=sort.value if sort else None,
|
116
|
-
sort_ord=sort_order.value,
|
117
|
-
)
|
118
|
-
except InvalidQueryError as exc:
|
119
|
-
return HTTPClientError(status_code=412, detail=str(exc))
|
81
|
+
top_k = top_k or SearchParamDefaults.top_k # type: ignore
|
82
|
+
top_k = cast(int, top_k)
|
120
83
|
|
121
|
-
|
122
|
-
|
123
|
-
|
84
|
+
with cache.request_caches():
|
85
|
+
try:
|
86
|
+
pb_query = await paragraph_query_to_pb(
|
87
|
+
kbid,
|
88
|
+
rid,
|
89
|
+
query,
|
90
|
+
fields,
|
91
|
+
filters,
|
92
|
+
faceted,
|
93
|
+
top_k,
|
94
|
+
range_creation_start,
|
95
|
+
range_creation_end,
|
96
|
+
range_modification_start,
|
97
|
+
range_modification_end,
|
98
|
+
sort=sort.value if sort else None,
|
99
|
+
sort_ord=sort_order.value,
|
100
|
+
)
|
101
|
+
except InvalidQueryError as exc:
|
102
|
+
return HTTPClientError(status_code=412, detail=str(exc))
|
124
103
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
104
|
+
results, incomplete_results, queried_nodes = await node_query(
|
105
|
+
kbid, Method.SEARCH, pb_query, shards
|
106
|
+
)
|
107
|
+
|
108
|
+
# We need to merge
|
109
|
+
search_results = await merge_paragraphs_results(
|
110
|
+
results,
|
111
|
+
top_k=top_k,
|
112
|
+
kbid=kbid,
|
113
|
+
highlight_split=highlight,
|
114
|
+
min_score=0.0,
|
115
|
+
)
|
137
116
|
|
138
|
-
|
139
|
-
|
140
|
-
|
117
|
+
response.status_code = 206 if incomplete_results else 200
|
118
|
+
if debug:
|
119
|
+
search_results.nodes = debug_nodes_info(queried_nodes)
|
141
120
|
|
142
|
-
|
143
|
-
|
144
|
-
|
121
|
+
queried_shards = [shard_id for _, shard_id in queried_nodes]
|
122
|
+
search_results.shards = queried_shards
|
123
|
+
return search_results
|
nucliadb/search/api/v1/search.py
CHANGED
@@ -18,7 +18,6 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import json
|
21
|
-
from datetime import datetime
|
22
21
|
from time import time
|
23
22
|
from typing import Optional, Union
|
24
23
|
|
@@ -28,15 +27,18 @@ from fastapi_versioning import version
|
|
28
27
|
from pydantic import ValidationError
|
29
28
|
|
30
29
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
30
|
+
from nucliadb.common.models_utils import to_proto
|
31
31
|
from nucliadb.models.responses import HTTPClientError
|
32
32
|
from nucliadb.search import predict
|
33
33
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
34
34
|
from nucliadb.search.api.v1.utils import fastapi_query
|
35
35
|
from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
|
36
|
+
from nucliadb.search.search import cache
|
36
37
|
from nucliadb.search.search.exceptions import InvalidQueryError
|
37
38
|
from nucliadb.search.search.merge import merge_results
|
38
39
|
from nucliadb.search.search.query import QueryParser
|
39
40
|
from nucliadb.search.search.utils import (
|
41
|
+
filter_hidden_resources,
|
40
42
|
min_score_from_payload,
|
41
43
|
min_score_from_query_params,
|
42
44
|
should_disable_vector_search,
|
@@ -45,9 +47,7 @@ from nucliadb_models.common import FieldTypeName
|
|
45
47
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
46
48
|
from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
|
47
49
|
from nucliadb_models.search import (
|
48
|
-
CatalogRequest,
|
49
50
|
KnowledgeboxSearchResults,
|
50
|
-
MinScore,
|
51
51
|
NucliaDBClientType,
|
52
52
|
ResourceProperties,
|
53
53
|
SearchOptions,
|
@@ -58,6 +58,7 @@ from nucliadb_models.search import (
|
|
58
58
|
SortOrder,
|
59
59
|
)
|
60
60
|
from nucliadb_models.security import RequestSecurity
|
61
|
+
from nucliadb_models.utils import DateTime
|
61
62
|
from nucliadb_utils.authentication import requires
|
62
63
|
from nucliadb_utils.exceptions import LimitsExceededError
|
63
64
|
from nucliadb_utils.utilities import get_audit
|
@@ -69,7 +70,7 @@ SEARCH_EXAMPLES = {
|
|
69
70
|
value={
|
70
71
|
"query": "Noam Chomsky",
|
71
72
|
"filters": ["/icon/application/pdf"],
|
72
|
-
"features": [SearchOptions.
|
73
|
+
"features": [SearchOptions.FULLTEXT],
|
73
74
|
},
|
74
75
|
),
|
75
76
|
"get_language_counts": Example(
|
@@ -78,7 +79,7 @@ SEARCH_EXAMPLES = {
|
|
78
79
|
value={
|
79
80
|
"page_size": 0,
|
80
81
|
"faceted": ["/s/p"],
|
81
|
-
"features": [SearchOptions.
|
82
|
+
"features": [SearchOptions.FULLTEXT],
|
82
83
|
},
|
83
84
|
),
|
84
85
|
}
|
@@ -106,40 +107,36 @@ async def search_knowledgebox(
|
|
106
107
|
sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
|
107
108
|
sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
|
108
109
|
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
109
|
-
|
110
|
-
page_size: int = fastapi_query(SearchParamDefaults.page_size),
|
110
|
+
top_k: int = fastapi_query(SearchParamDefaults.top_k),
|
111
111
|
min_score: Optional[float] = Query(
|
112
112
|
default=None,
|
113
|
-
description="Minimum similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/
|
113
|
+
description="Minimum similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
|
114
114
|
deprecated=True,
|
115
115
|
),
|
116
116
|
min_score_semantic: Optional[float] = Query(
|
117
117
|
default=None,
|
118
|
-
description="Minimum semantic similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/
|
118
|
+
description="Minimum semantic similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
|
119
119
|
),
|
120
120
|
min_score_bm25: float = Query(
|
121
121
|
default=0,
|
122
122
|
description="Minimum bm25 score to filter paragraph and document index results",
|
123
123
|
ge=0,
|
124
124
|
),
|
125
|
-
|
126
|
-
|
127
|
-
),
|
128
|
-
|
129
|
-
SearchParamDefaults.range_creation_end
|
130
|
-
),
|
131
|
-
range_modification_start: Optional[datetime] = fastapi_query(
|
125
|
+
vectorset: Optional[str] = fastapi_query(SearchParamDefaults.vectorset),
|
126
|
+
range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
|
127
|
+
range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
|
128
|
+
range_modification_start: Optional[DateTime] = fastapi_query(
|
132
129
|
SearchParamDefaults.range_modification_start
|
133
130
|
),
|
134
|
-
range_modification_end: Optional[
|
131
|
+
range_modification_end: Optional[DateTime] = fastapi_query(
|
135
132
|
SearchParamDefaults.range_modification_end
|
136
133
|
),
|
137
134
|
features: list[SearchOptions] = fastapi_query(
|
138
135
|
SearchParamDefaults.search_features,
|
139
136
|
default=[
|
140
|
-
SearchOptions.
|
141
|
-
SearchOptions.
|
142
|
-
SearchOptions.
|
137
|
+
SearchOptions.KEYWORD,
|
138
|
+
SearchOptions.FULLTEXT,
|
139
|
+
SearchOptions.SEMANTIC,
|
143
140
|
],
|
144
141
|
),
|
145
142
|
debug: bool = fastapi_query(SearchParamDefaults.debug),
|
@@ -148,14 +145,13 @@ async def search_knowledgebox(
|
|
148
145
|
field_type_filter: list[FieldTypeName] = fastapi_query(
|
149
146
|
SearchParamDefaults.field_type_filter, alias="field_type"
|
150
147
|
),
|
151
|
-
extracted: list[ExtractedDataTypeName] = fastapi_query(
|
152
|
-
SearchParamDefaults.extracted
|
153
|
-
),
|
148
|
+
extracted: list[ExtractedDataTypeName] = fastapi_query(SearchParamDefaults.extracted),
|
154
149
|
shards: list[str] = fastapi_query(SearchParamDefaults.shards),
|
155
150
|
with_duplicates: bool = fastapi_query(SearchParamDefaults.with_duplicates),
|
156
151
|
with_synonyms: bool = fastapi_query(SearchParamDefaults.with_synonyms),
|
157
152
|
autofilter: bool = fastapi_query(SearchParamDefaults.autofilter),
|
158
153
|
security_groups: list[str] = fastapi_query(SearchParamDefaults.security_groups),
|
154
|
+
show_hidden: bool = fastapi_query(SearchParamDefaults.show_hidden),
|
159
155
|
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
160
156
|
x_nucliadb_user: str = Header(""),
|
161
157
|
x_forwarded_for: str = Header(""),
|
@@ -174,11 +170,9 @@ async def search_knowledgebox(
|
|
174
170
|
if sort_field is not None
|
175
171
|
else None
|
176
172
|
),
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
min_score_bm25, min_score_semantic, min_score
|
181
|
-
),
|
173
|
+
top_k=top_k,
|
174
|
+
min_score=min_score_from_query_params(min_score_bm25, min_score_semantic, min_score),
|
175
|
+
vectorset=vectorset,
|
182
176
|
range_creation_end=range_creation_end,
|
183
177
|
range_creation_start=range_creation_start,
|
184
178
|
range_modification_end=range_modification_end,
|
@@ -194,173 +188,12 @@ async def search_knowledgebox(
|
|
194
188
|
with_synonyms=with_synonyms,
|
195
189
|
autofilter=autofilter,
|
196
190
|
security=security,
|
191
|
+
show_hidden=show_hidden,
|
197
192
|
)
|
198
193
|
except ValidationError as exc:
|
199
194
|
detail = json.loads(exc.json())
|
200
195
|
return HTTPClientError(status_code=422, detail=detail)
|
201
|
-
return await _search_endpoint(
|
202
|
-
response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
|
203
|
-
)
|
204
|
-
|
205
|
-
|
206
|
-
@api.get(
|
207
|
-
f"/{KB_PREFIX}/{{kbid}}/catalog",
|
208
|
-
status_code=200,
|
209
|
-
summary="List resources of a Knowledge Box",
|
210
|
-
description="List resources of a Knowledge Box",
|
211
|
-
response_model=KnowledgeboxSearchResults,
|
212
|
-
response_model_exclude_unset=True,
|
213
|
-
tags=["Search"],
|
214
|
-
)
|
215
|
-
@requires(NucliaDBRoles.READER)
|
216
|
-
@version(1)
|
217
|
-
async def catalog_get(
|
218
|
-
request: Request,
|
219
|
-
response: Response,
|
220
|
-
kbid: str,
|
221
|
-
query: str = fastapi_query(SearchParamDefaults.query),
|
222
|
-
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
223
|
-
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
224
|
-
sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
|
225
|
-
sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
|
226
|
-
sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
|
227
|
-
page_number: int = fastapi_query(SearchParamDefaults.page_number),
|
228
|
-
page_size: int = fastapi_query(SearchParamDefaults.page_size),
|
229
|
-
shards: list[str] = fastapi_query(SearchParamDefaults.shards),
|
230
|
-
with_status: Optional[ResourceProcessingStatus] = fastapi_query(
|
231
|
-
SearchParamDefaults.with_status
|
232
|
-
),
|
233
|
-
debug: bool = fastapi_query(SearchParamDefaults.debug),
|
234
|
-
range_creation_start: Optional[datetime] = fastapi_query(
|
235
|
-
SearchParamDefaults.range_creation_start
|
236
|
-
),
|
237
|
-
range_creation_end: Optional[datetime] = fastapi_query(
|
238
|
-
SearchParamDefaults.range_creation_end
|
239
|
-
),
|
240
|
-
range_modification_start: Optional[datetime] = fastapi_query(
|
241
|
-
SearchParamDefaults.range_modification_start
|
242
|
-
),
|
243
|
-
range_modification_end: Optional[datetime] = fastapi_query(
|
244
|
-
SearchParamDefaults.range_modification_end
|
245
|
-
),
|
246
|
-
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
247
|
-
item = CatalogRequest(
|
248
|
-
query=query,
|
249
|
-
filters=filters,
|
250
|
-
faceted=faceted,
|
251
|
-
page_number=page_number,
|
252
|
-
page_size=page_size,
|
253
|
-
shards=shards,
|
254
|
-
debug=debug,
|
255
|
-
with_status=with_status,
|
256
|
-
range_creation_start=range_creation_start,
|
257
|
-
range_creation_end=range_creation_end,
|
258
|
-
range_modification_start=range_modification_start,
|
259
|
-
range_modification_end=range_modification_end,
|
260
|
-
)
|
261
|
-
if sort_field:
|
262
|
-
item.sort = SortOptions(field=sort_field, limit=sort_limit, order=sort_order)
|
263
|
-
return await catalog(kbid, item)
|
264
|
-
|
265
|
-
|
266
|
-
@api.post(
|
267
|
-
f"/{KB_PREFIX}/{{kbid}}/catalog",
|
268
|
-
status_code=200,
|
269
|
-
summary="List resources of a Knowledge Box",
|
270
|
-
description="List resources of a Knowledge Box",
|
271
|
-
response_model=KnowledgeboxSearchResults,
|
272
|
-
response_model_exclude_unset=True,
|
273
|
-
tags=["Search"],
|
274
|
-
)
|
275
|
-
@requires(NucliaDBRoles.READER)
|
276
|
-
@version(1)
|
277
|
-
async def catalog_post(
|
278
|
-
request: Request,
|
279
|
-
kbid: str,
|
280
|
-
item: CatalogRequest,
|
281
|
-
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
282
|
-
return await catalog(kbid, item)
|
283
|
-
|
284
|
-
|
285
|
-
async def catalog(
|
286
|
-
kbid: str,
|
287
|
-
item: CatalogRequest,
|
288
|
-
):
|
289
|
-
"""
|
290
|
-
Catalog endpoint is a simplified version of the search endpoint, it only
|
291
|
-
returns bm25 results on titles and it does not support vector search.
|
292
|
-
It is useful for listing resources in a knowledge box.
|
293
|
-
"""
|
294
|
-
try:
|
295
|
-
sort = item.sort
|
296
|
-
if item.sort is None:
|
297
|
-
# By default we sort by creation date (most recent first)
|
298
|
-
sort = SortOptions(
|
299
|
-
field=SortField.CREATED,
|
300
|
-
order=SortOrder.DESC,
|
301
|
-
limit=None,
|
302
|
-
)
|
303
|
-
|
304
|
-
query_parser = QueryParser(
|
305
|
-
kbid=kbid,
|
306
|
-
features=[SearchOptions.DOCUMENT],
|
307
|
-
query=item.query,
|
308
|
-
filters=item.filters,
|
309
|
-
faceted=item.faceted,
|
310
|
-
sort=sort,
|
311
|
-
page_number=item.page_number,
|
312
|
-
page_size=item.page_size,
|
313
|
-
min_score=MinScore(bm25=0, semantic=0),
|
314
|
-
fields=["a/title"],
|
315
|
-
with_status=item.with_status,
|
316
|
-
range_creation_start=item.range_creation_start,
|
317
|
-
range_creation_end=item.range_creation_end,
|
318
|
-
range_modification_start=item.range_modification_start,
|
319
|
-
range_modification_end=item.range_modification_end,
|
320
|
-
)
|
321
|
-
pb_query, _, _ = await query_parser.parse()
|
322
|
-
|
323
|
-
(results, _, queried_nodes) = await node_query(
|
324
|
-
kbid,
|
325
|
-
Method.SEARCH,
|
326
|
-
pb_query,
|
327
|
-
target_shard_replicas=item.shards,
|
328
|
-
# Catalog should not go to read replicas because we want it to be
|
329
|
-
# consistent and most up to date results
|
330
|
-
use_read_replica_nodes=False,
|
331
|
-
)
|
332
|
-
|
333
|
-
# We need to merge
|
334
|
-
search_results = await merge_results(
|
335
|
-
results,
|
336
|
-
count=item.page_size,
|
337
|
-
page=item.page_number,
|
338
|
-
kbid=kbid,
|
339
|
-
show=[ResourceProperties.BASIC],
|
340
|
-
field_type_filter=[],
|
341
|
-
extracted=[],
|
342
|
-
sort=sort,
|
343
|
-
requested_relations=pb_query.relation_subgraph,
|
344
|
-
min_score=query_parser.min_score,
|
345
|
-
highlight=False,
|
346
|
-
)
|
347
|
-
# We don't need sentences, paragraphs or relations on the catalog
|
348
|
-
# response, so we set to None so that fastapi doesn't include them
|
349
|
-
# in the response payload
|
350
|
-
search_results.sentences = None
|
351
|
-
search_results.paragraphs = None
|
352
|
-
search_results.relations = None
|
353
|
-
if item.debug:
|
354
|
-
search_results.nodes = debug_nodes_info(queried_nodes)
|
355
|
-
queried_shards = [shard_id for _, shard_id in queried_nodes]
|
356
|
-
search_results.shards = queried_shards
|
357
|
-
return search_results
|
358
|
-
except InvalidQueryError as exc:
|
359
|
-
return HTTPClientError(status_code=412, detail=str(exc))
|
360
|
-
except KnowledgeBoxNotFound:
|
361
|
-
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
362
|
-
except LimitsExceededError as exc:
|
363
|
-
return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
|
196
|
+
return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
|
364
197
|
|
365
198
|
|
366
199
|
@api.post(
|
@@ -383,9 +216,7 @@ async def search_post_knowledgebox(
|
|
383
216
|
x_nucliadb_user: str = Header(""),
|
384
217
|
x_forwarded_for: str = Header(""),
|
385
218
|
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
386
|
-
return await _search_endpoint(
|
387
|
-
response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
|
388
|
-
)
|
219
|
+
return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
|
389
220
|
|
390
221
|
|
391
222
|
async def _search_endpoint(
|
@@ -397,13 +228,13 @@ async def _search_endpoint(
|
|
397
228
|
x_forwarded_for: str,
|
398
229
|
**kwargs,
|
399
230
|
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
400
|
-
# All endpoint logic should be here
|
401
231
|
try:
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
232
|
+
with cache.request_caches():
|
233
|
+
results, incomplete = await search(
|
234
|
+
kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, **kwargs
|
235
|
+
)
|
236
|
+
response.status_code = 206 if incomplete else 200
|
237
|
+
return results
|
407
238
|
except KnowledgeBoxNotFound:
|
408
239
|
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
409
240
|
except LimitsExceededError as exc:
|
@@ -431,20 +262,20 @@ async def search(
|
|
431
262
|
|
432
263
|
item.min_score = min_score_from_payload(item.min_score)
|
433
264
|
|
434
|
-
if SearchOptions.
|
265
|
+
if SearchOptions.SEMANTIC in item.features:
|
435
266
|
if should_disable_vector_search(item):
|
436
|
-
item.features.remove(SearchOptions.
|
267
|
+
item.features.remove(SearchOptions.SEMANTIC)
|
437
268
|
|
438
269
|
# We need to query all nodes
|
439
270
|
query_parser = QueryParser(
|
440
271
|
kbid=kbid,
|
441
272
|
features=item.features,
|
442
273
|
query=item.query,
|
443
|
-
|
274
|
+
label_filters=item.filters,
|
275
|
+
keyword_filters=[],
|
444
276
|
faceted=item.faceted,
|
445
277
|
sort=item.sort,
|
446
|
-
|
447
|
-
page_size=item.page_size,
|
278
|
+
top_k=item.top_k,
|
448
279
|
min_score=item.min_score,
|
449
280
|
range_creation_start=item.range_creation_start,
|
450
281
|
range_creation_end=item.range_creation_end,
|
@@ -452,12 +283,15 @@ async def search(
|
|
452
283
|
range_modification_end=item.range_modification_end,
|
453
284
|
fields=item.fields,
|
454
285
|
user_vector=item.vector,
|
286
|
+
vectorset=item.vectorset,
|
455
287
|
with_duplicates=item.with_duplicates,
|
456
288
|
with_status=with_status,
|
457
289
|
with_synonyms=item.with_synonyms,
|
458
290
|
autofilter=item.autofilter,
|
459
291
|
security=item.security,
|
460
292
|
rephrase=item.rephrase,
|
293
|
+
hidden=await filter_hidden_resources(kbid, item.show_hidden),
|
294
|
+
rephrase_prompt=item.rephrase_prompt,
|
461
295
|
)
|
462
296
|
pb_query, incomplete_results, autofilters = await query_parser.parse()
|
463
297
|
|
@@ -470,28 +304,28 @@ async def search(
|
|
470
304
|
# We need to merge
|
471
305
|
search_results = await merge_results(
|
472
306
|
results,
|
473
|
-
|
474
|
-
page=item.page_number,
|
307
|
+
top_k=item.top_k,
|
475
308
|
kbid=kbid,
|
476
309
|
show=item.show,
|
477
310
|
field_type_filter=item.field_type_filter,
|
478
311
|
extracted=item.extracted,
|
479
|
-
sort=query_parser.sort,
|
312
|
+
sort=query_parser.sort, # type: ignore
|
480
313
|
requested_relations=pb_query.relation_subgraph,
|
481
314
|
min_score=query_parser.min_score,
|
482
315
|
highlight=item.highlight,
|
483
316
|
)
|
484
317
|
|
485
318
|
if audit is not None and do_audit:
|
486
|
-
|
319
|
+
audit.search(
|
487
320
|
kbid,
|
488
321
|
x_nucliadb_user,
|
489
|
-
|
322
|
+
to_proto.client_type(x_ndb_client),
|
490
323
|
x_forwarded_for,
|
491
324
|
pb_query,
|
492
325
|
time() - start_time,
|
493
326
|
len(search_results.resources),
|
494
327
|
)
|
328
|
+
|
495
329
|
if item.debug:
|
496
330
|
search_results.nodes = debug_nodes_info(queried_nodes)
|
497
331
|
|