nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/search/search/query.py
CHANGED
@@ -19,24 +19,19 @@
|
|
19
19
|
#
|
20
20
|
import asyncio
|
21
21
|
import json
|
22
|
+
import string
|
22
23
|
from datetime import datetime
|
23
24
|
from typing import Any, Awaitable, Optional, Union
|
24
25
|
|
25
26
|
from async_lru import alru_cache
|
26
|
-
from nucliadb_protos.noderesources_pb2 import Resource
|
27
27
|
|
28
28
|
from nucliadb.common import datamanagers
|
29
|
-
from nucliadb.
|
30
|
-
from nucliadb.middleware.transaction import get_read_only_transaction
|
29
|
+
from nucliadb.common.maindb.utils import get_driver
|
31
30
|
from nucliadb.search import logger
|
32
|
-
from nucliadb.search.predict import
|
33
|
-
PredictVectorMissing,
|
34
|
-
SendToPredictError,
|
35
|
-
convert_relations,
|
36
|
-
)
|
31
|
+
from nucliadb.search.predict import SendToPredictError, convert_relations
|
37
32
|
from nucliadb.search.search.filters import (
|
38
33
|
convert_to_node_filters,
|
39
|
-
|
34
|
+
flatten_filter_literals,
|
40
35
|
has_classification_label_filters,
|
41
36
|
split_labels_by_type,
|
42
37
|
translate_label,
|
@@ -46,27 +41,30 @@ from nucliadb.search.search.metrics import (
|
|
46
41
|
node_features,
|
47
42
|
query_parse_dependency_observer,
|
48
43
|
)
|
44
|
+
from nucliadb.search.search.rank_fusion import (
|
45
|
+
RankFusionAlgorithm,
|
46
|
+
)
|
47
|
+
from nucliadb.search.search.rerankers import (
|
48
|
+
Reranker,
|
49
|
+
)
|
49
50
|
from nucliadb.search.utilities import get_predict
|
50
|
-
from nucliadb_models.
|
51
|
+
from nucliadb_models.internal.predict import QueryInfo
|
52
|
+
from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
|
51
53
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
52
54
|
from nucliadb_models.search import (
|
53
55
|
Filter,
|
56
|
+
MaxTokens,
|
54
57
|
MinScore,
|
55
|
-
QueryInfo,
|
56
58
|
SearchOptions,
|
57
|
-
SentenceSearch,
|
58
59
|
SortField,
|
59
|
-
SortFieldMap,
|
60
60
|
SortOptions,
|
61
61
|
SortOrder,
|
62
62
|
SortOrderMap,
|
63
63
|
SuggestOptions,
|
64
|
-
TokenSearch,
|
65
64
|
)
|
66
65
|
from nucliadb_models.security import RequestSecurity
|
67
66
|
from nucliadb_protos import knowledgebox_pb2, nodereader_pb2, utils_pb2
|
68
|
-
from
|
69
|
-
from nucliadb_utils.utilities import has_feature
|
67
|
+
from nucliadb_protos.noderesources_pb2 import Resource
|
70
68
|
|
71
69
|
from .exceptions import InvalidQueryError
|
72
70
|
|
@@ -75,6 +73,8 @@ INDEX_SORTABLE_FIELDS = [
|
|
75
73
|
SortField.MODIFIED,
|
76
74
|
]
|
77
75
|
|
76
|
+
DEFAULT_GENERIC_SEMANTIC_THRESHOLD = 0.7
|
77
|
+
|
78
78
|
|
79
79
|
class QueryParser:
|
80
80
|
"""
|
@@ -86,14 +86,14 @@ class QueryParser:
|
|
86
86
|
query parsing.
|
87
87
|
"""
|
88
88
|
|
89
|
-
_min_score_task: Optional[asyncio.Task] = None
|
90
89
|
_query_information_task: Optional[asyncio.Task] = None
|
91
|
-
|
90
|
+
_get_vectorset_task: Optional[asyncio.Task] = None
|
92
91
|
_detected_entities_task: Optional[asyncio.Task] = None
|
93
92
|
_entities_meta_cache_task: Optional[asyncio.Task] = None
|
94
93
|
_deleted_entities_groups_task: Optional[asyncio.Task] = None
|
95
94
|
_synonyms_task: Optional[asyncio.Task] = None
|
96
95
|
_get_classification_labels_task: Optional[asyncio.Task] = None
|
96
|
+
_get_matryoshka_dimension_task: Optional[asyncio.Task] = None
|
97
97
|
|
98
98
|
def __init__(
|
99
99
|
self,
|
@@ -101,9 +101,9 @@ class QueryParser:
|
|
101
101
|
kbid: str,
|
102
102
|
features: list[SearchOptions],
|
103
103
|
query: str,
|
104
|
-
|
105
|
-
|
106
|
-
|
104
|
+
label_filters: Union[list[str], list[Filter]],
|
105
|
+
keyword_filters: Union[list[str], list[Filter]],
|
106
|
+
top_k: int,
|
107
107
|
min_score: MinScore,
|
108
108
|
faceted: Optional[list[str]] = None,
|
109
109
|
sort: Optional[SortOptions] = None,
|
@@ -121,16 +121,28 @@ class QueryParser:
|
|
121
121
|
key_filters: Optional[list[str]] = None,
|
122
122
|
security: Optional[RequestSecurity] = None,
|
123
123
|
generative_model: Optional[str] = None,
|
124
|
-
rephrase:
|
124
|
+
rephrase: bool = False,
|
125
|
+
rephrase_prompt: Optional[str] = None,
|
126
|
+
max_tokens: Optional[MaxTokens] = None,
|
127
|
+
hidden: Optional[bool] = None,
|
128
|
+
rank_fusion: Optional[RankFusionAlgorithm] = None,
|
129
|
+
reranker: Optional[Reranker] = None,
|
125
130
|
):
|
126
131
|
self.kbid = kbid
|
127
132
|
self.features = features
|
128
133
|
self.query = query
|
129
|
-
self.
|
130
|
-
self.
|
134
|
+
self.hidden = hidden
|
135
|
+
if self.hidden is not None:
|
136
|
+
if self.hidden:
|
137
|
+
label_filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
|
138
|
+
else:
|
139
|
+
label_filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
|
140
|
+
|
141
|
+
self.label_filters: dict[str, Any] = convert_to_node_filters(label_filters)
|
142
|
+
self.flat_label_filters: list[str] = []
|
143
|
+
self.keyword_filters: dict[str, Any] = convert_to_node_filters(keyword_filters)
|
131
144
|
self.faceted = faceted or []
|
132
|
-
self.
|
133
|
-
self.page_size = page_size
|
145
|
+
self.top_k = top_k
|
134
146
|
self.min_score = min_score
|
135
147
|
self.sort = sort
|
136
148
|
self.range_creation_start = range_creation_start
|
@@ -148,65 +160,81 @@ class QueryParser:
|
|
148
160
|
self.security = security
|
149
161
|
self.generative_model = generative_model
|
150
162
|
self.rephrase = rephrase
|
151
|
-
self.
|
152
|
-
|
153
|
-
|
154
|
-
|
163
|
+
self.rephrase_prompt = rephrase_prompt
|
164
|
+
self.query_endpoint_used = False
|
165
|
+
if len(self.label_filters) > 0:
|
166
|
+
self.label_filters = translate_label_filters(self.label_filters)
|
167
|
+
self.flat_label_filters = flatten_filter_literals(self.label_filters)
|
168
|
+
self.max_tokens = max_tokens
|
169
|
+
self.rank_fusion = rank_fusion
|
170
|
+
self.reranker = reranker
|
171
|
+
|
172
|
+
@property
|
173
|
+
def has_vector_search(self) -> bool:
|
174
|
+
return SearchOptions.SEMANTIC in self.features
|
175
|
+
|
176
|
+
@property
|
177
|
+
def has_relations_search(self) -> bool:
|
178
|
+
return SearchOptions.RELATIONS in self.features
|
179
|
+
|
180
|
+
def _get_query_information(self) -> Awaitable[QueryInfo]:
|
181
|
+
if self._query_information_task is None: # pragma: no cover
|
182
|
+
self._query_information_task = asyncio.create_task(self._query_information())
|
183
|
+
return self._query_information_task
|
184
|
+
|
185
|
+
async def _query_information(self) -> QueryInfo:
|
186
|
+
vectorset = await self.select_query_vectorset()
|
187
|
+
return await query_information(
|
188
|
+
self.kbid, self.query, vectorset, self.generative_model, self.rephrase, self.rephrase_prompt
|
155
189
|
)
|
156
190
|
|
157
|
-
|
158
|
-
|
159
|
-
self.
|
191
|
+
def _get_vectorset(self) -> Awaitable[Optional[str]]:
|
192
|
+
if self._get_vectorset_task is None:
|
193
|
+
self._get_vectorset_task = asyncio.create_task(self._select_vectorset())
|
194
|
+
return self._get_vectorset_task
|
160
195
|
|
161
|
-
def
|
162
|
-
if self.
|
163
|
-
self.
|
164
|
-
get_default_semantic_min_score(self.kbid)
|
165
|
-
)
|
166
|
-
return self._min_score_task
|
196
|
+
async def _select_vectorset(self) -> Optional[str]:
|
197
|
+
if self.vectorset:
|
198
|
+
return self.vectorset
|
167
199
|
|
168
|
-
|
169
|
-
if self._convert_vectors_task is None: # pragma: no cover
|
170
|
-
self._convert_vectors_task = asyncio.create_task(
|
171
|
-
convert_vectors(self.kbid, self.query)
|
172
|
-
)
|
173
|
-
return self._convert_vectors_task
|
200
|
+
# When vectorset is not provided we get the default from Predict API
|
174
201
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
return QueryInfo(
|
180
|
-
visual_llm=False,
|
181
|
-
max_context=300_000,
|
182
|
-
entities=TokenSearch(tokens=[], time=0.0),
|
183
|
-
sentence=SentenceSearch(data=[], time=0.0),
|
184
|
-
query=self.query,
|
185
|
-
)
|
202
|
+
try:
|
203
|
+
query_information = await self._get_query_information()
|
204
|
+
except SendToPredictError:
|
205
|
+
return None
|
186
206
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
query_information(
|
191
|
-
self.kbid, self.query, self.generative_model, self.rephrase
|
192
|
-
)
|
207
|
+
if query_information.sentence is None:
|
208
|
+
logger.error(
|
209
|
+
"Asking for a vectorset but /query didn't return one", extra={"kbid": self.kbid}
|
193
210
|
)
|
194
|
-
|
211
|
+
return None
|
212
|
+
|
213
|
+
for vectorset in query_information.sentence.vectors.keys():
|
214
|
+
self.vectorset = vectorset
|
215
|
+
break
|
216
|
+
|
217
|
+
return self.vectorset
|
218
|
+
|
219
|
+
def _get_matryoshka_dimension(self) -> Awaitable[Optional[int]]:
|
220
|
+
if self._get_matryoshka_dimension_task is None:
|
221
|
+
self._get_matryoshka_dimension_task = asyncio.create_task(self._matryoshka_dimension())
|
222
|
+
return self._get_matryoshka_dimension_task
|
223
|
+
|
224
|
+
async def _matryoshka_dimension(self) -> Optional[int]:
|
225
|
+
vectorset = await self._select_vectorset()
|
226
|
+
return await get_matryoshka_dimension_cached(self.kbid, vectorset)
|
195
227
|
|
196
228
|
def _get_detected_entities(self) -> Awaitable[list[utils_pb2.RelationNode]]:
|
197
229
|
if self._detected_entities_task is None: # pragma: no cover
|
198
|
-
self._detected_entities_task = asyncio.create_task(
|
199
|
-
detect_entities(self.kbid, self.query)
|
200
|
-
)
|
230
|
+
self._detected_entities_task = asyncio.create_task(detect_entities(self.kbid, self.query))
|
201
231
|
return self._detected_entities_task
|
202
232
|
|
203
233
|
def _get_entities_meta_cache(
|
204
234
|
self,
|
205
235
|
) -> Awaitable[datamanagers.entities.EntitiesMetaCache]:
|
206
236
|
if self._entities_meta_cache_task is None:
|
207
|
-
self._entities_meta_cache_task = asyncio.create_task(
|
208
|
-
get_entities_meta_cache(self.kbid)
|
209
|
-
)
|
237
|
+
self._entities_meta_cache_task = asyncio.create_task(get_entities_meta_cache(self.kbid))
|
210
238
|
return self._entities_meta_cache_task
|
211
239
|
|
212
240
|
def _get_deleted_entity_groups(self) -> Awaitable[list[str]]:
|
@@ -233,28 +261,17 @@ class QueryParser:
|
|
233
261
|
This will schedule concurrent tasks for different data that needs to be pulled
|
234
262
|
for the sake of the query being performed
|
235
263
|
"""
|
236
|
-
if len(self.
|
237
|
-
self.flat_filter_labels
|
238
|
-
):
|
264
|
+
if len(self.label_filters) > 0 and has_classification_label_filters(self.flat_label_filters):
|
239
265
|
asyncio.ensure_future(self._get_classification_labels())
|
240
|
-
if self.min_score.semantic is None:
|
241
|
-
asyncio.ensure_future(self._get_default_semantic_min_score())
|
242
266
|
|
243
|
-
if
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
) > 0:
|
252
|
-
if (
|
253
|
-
not self.query_endpoint_enabled
|
254
|
-
or SearchOptions.VECTOR not in self.features
|
255
|
-
or self.user_vector is not None
|
256
|
-
):
|
257
|
-
self.query_endpoint_enabled = False
|
267
|
+
if self.has_vector_search and self.user_vector is None:
|
268
|
+
self.query_endpoint_used = True
|
269
|
+
asyncio.ensure_future(self._get_query_information())
|
270
|
+
asyncio.ensure_future(self._get_matryoshka_dimension())
|
271
|
+
|
272
|
+
if (self.has_relations_search or self.autofilter) and len(self.query) > 0:
|
273
|
+
if not self.query_endpoint_used:
|
274
|
+
# If we only need to detect entities, we don't need the query endpoint
|
258
275
|
asyncio.ensure_future(self._get_detected_entities())
|
259
276
|
asyncio.ensure_future(self._get_entities_meta_cache())
|
260
277
|
asyncio.ensure_future(self._get_deleted_entity_groups())
|
@@ -273,6 +290,8 @@ class QueryParser:
|
|
273
290
|
request.body = self.query
|
274
291
|
request.with_duplicates = self.with_duplicates
|
275
292
|
|
293
|
+
self.parse_sorting(request)
|
294
|
+
|
276
295
|
await self._schedule_dependency_tasks()
|
277
296
|
|
278
297
|
await self.parse_filters(request)
|
@@ -281,30 +300,29 @@ class QueryParser:
|
|
281
300
|
incomplete = await self.parse_vector_search(request)
|
282
301
|
autofilters = await self.parse_relation_search(request)
|
283
302
|
await self.parse_synonyms(request)
|
284
|
-
|
285
|
-
self.
|
286
|
-
await self.parse_min_score(request)
|
287
|
-
|
303
|
+
await self.parse_min_score(request, incomplete)
|
304
|
+
await self.adjust_page_size(request, self.rank_fusion, self.reranker)
|
288
305
|
return request, incomplete, autofilters
|
289
306
|
|
290
307
|
async def parse_filters(self, request: nodereader_pb2.SearchRequest) -> None:
|
291
|
-
if len(self.
|
292
|
-
field_labels = self.
|
308
|
+
if len(self.label_filters) > 0:
|
309
|
+
field_labels = self.flat_label_filters
|
293
310
|
paragraph_labels: list[str] = []
|
294
|
-
if has_classification_label_filters(self.
|
311
|
+
if has_classification_label_filters(self.flat_label_filters):
|
295
312
|
classification_labels = await self._get_classification_labels()
|
296
313
|
field_labels, paragraph_labels = split_labels_by_type(
|
297
|
-
self.
|
314
|
+
self.flat_label_filters, classification_labels
|
298
315
|
)
|
299
|
-
check_supported_filters(self.
|
316
|
+
check_supported_filters(self.label_filters, paragraph_labels)
|
300
317
|
|
301
318
|
request.filter.field_labels.extend(field_labels)
|
302
319
|
request.filter.paragraph_labels.extend(paragraph_labels)
|
303
|
-
request.filter.
|
320
|
+
request.filter.labels_expression = json.dumps(self.label_filters)
|
304
321
|
|
305
|
-
|
306
|
-
|
307
|
-
|
322
|
+
if len(self.keyword_filters) > 0:
|
323
|
+
request.filter.keywords_expression = json.dumps(self.keyword_filters)
|
324
|
+
|
325
|
+
request.faceted.labels.extend([translate_label(facet) for facet in self.faceted])
|
308
326
|
request.fields.extend(self.fields)
|
309
327
|
|
310
328
|
if self.security is not None and len(self.security.groups) > 0:
|
@@ -354,9 +372,7 @@ class QueryParser:
|
|
354
372
|
order=SortOrder.DESC,
|
355
373
|
limit=None,
|
356
374
|
)
|
357
|
-
elif
|
358
|
-
self.sort.field not in INDEX_SORTABLE_FIELDS and self.sort.limit is None
|
359
|
-
):
|
375
|
+
elif self.sort.field not in INDEX_SORTABLE_FIELDS and self.sort.limit is None:
|
360
376
|
raise InvalidQueryError(
|
361
377
|
"sort_field",
|
362
378
|
f"Sort by '{self.sort.field}' requires setting a sort limit",
|
@@ -369,95 +385,133 @@ class QueryParser:
|
|
369
385
|
# have consistent results, we must limit them
|
370
386
|
request.result_per_page = self.sort.limit
|
371
387
|
else:
|
372
|
-
request.result_per_page = self.
|
388
|
+
request.result_per_page = self.top_k
|
373
389
|
|
374
|
-
sort_field =
|
390
|
+
sort_field = get_sort_field_proto(self.sort.field) if self.sort else None
|
375
391
|
if sort_field is not None:
|
376
392
|
request.order.sort_by = sort_field
|
377
393
|
request.order.type = SortOrderMap[self.sort.order] # type: ignore
|
378
394
|
|
379
|
-
async def parse_min_score(self, request: nodereader_pb2.SearchRequest) -> None:
|
380
|
-
|
381
|
-
|
395
|
+
async def parse_min_score(self, request: nodereader_pb2.SearchRequest, incomplete: bool) -> None:
|
396
|
+
semantic_min_score = DEFAULT_GENERIC_SEMANTIC_THRESHOLD
|
397
|
+
if self.min_score.semantic is not None:
|
398
|
+
semantic_min_score = self.min_score.semantic
|
399
|
+
elif self.has_vector_search and not incomplete:
|
400
|
+
query_information = await self._get_query_information()
|
401
|
+
vectorset = await self._select_vectorset()
|
402
|
+
if vectorset is not None:
|
403
|
+
semantic_threshold = query_information.semantic_thresholds.get(vectorset, None)
|
404
|
+
if semantic_threshold is not None:
|
405
|
+
semantic_min_score = semantic_threshold
|
406
|
+
else:
|
407
|
+
logger.warning(
|
408
|
+
"Semantic threshold not found in query information, using default",
|
409
|
+
extra={"kbid": self.kbid},
|
410
|
+
)
|
411
|
+
else:
|
412
|
+
logger.warning(
|
413
|
+
"Vectorset unset by user or predict, using default semantic threshold",
|
414
|
+
extra={"kbid": self.kbid},
|
415
|
+
)
|
416
|
+
self.min_score.semantic = semantic_min_score
|
382
417
|
request.min_score_semantic = self.min_score.semantic
|
383
418
|
request.min_score_bm25 = self.min_score.bm25
|
384
419
|
|
385
420
|
def parse_document_search(self, request: nodereader_pb2.SearchRequest) -> None:
|
386
|
-
if SearchOptions.
|
421
|
+
if SearchOptions.FULLTEXT in self.features:
|
387
422
|
request.document = True
|
388
423
|
node_features.inc({"type": "documents"})
|
389
424
|
|
390
425
|
def parse_paragraph_search(self, request: nodereader_pb2.SearchRequest) -> None:
|
391
|
-
if SearchOptions.
|
426
|
+
if SearchOptions.KEYWORD in self.features:
|
392
427
|
request.paragraph = True
|
393
428
|
node_features.inc({"type": "paragraphs"})
|
394
429
|
|
430
|
+
async def select_query_vectorset(self) -> Optional[str]:
|
431
|
+
"""Set and return the requested vectorset parameter (if used) validated
|
432
|
+
for the current KB.
|
433
|
+
|
434
|
+
"""
|
435
|
+
if not self.vectorset:
|
436
|
+
return None
|
437
|
+
|
438
|
+
# validate vectorset
|
439
|
+
async with datamanagers.with_ro_transaction() as txn:
|
440
|
+
if not await datamanagers.vectorsets.exists(
|
441
|
+
txn, kbid=self.kbid, vectorset_id=self.vectorset
|
442
|
+
):
|
443
|
+
raise InvalidQueryError(
|
444
|
+
"vectorset",
|
445
|
+
f"Vectorset {self.vectorset} doesn't exist in you Knowledge Box",
|
446
|
+
)
|
447
|
+
return self.vectorset
|
448
|
+
|
395
449
|
async def parse_vector_search(self, request: nodereader_pb2.SearchRequest) -> bool:
|
396
|
-
if
|
450
|
+
if not self.has_vector_search:
|
397
451
|
return False
|
398
452
|
|
399
453
|
node_features.inc({"type": "vectors"})
|
400
454
|
|
401
455
|
incomplete = False
|
402
|
-
if self.vectorset is not None:
|
403
|
-
request.vectorset = self.vectorset
|
404
|
-
node_features.inc({"type": "vectorset"})
|
405
456
|
|
457
|
+
vectorset = await self._select_vectorset()
|
458
|
+
if vectorset is not None:
|
459
|
+
request.vectorset = vectorset
|
460
|
+
|
461
|
+
query_vector = None
|
406
462
|
if self.user_vector is None:
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
else:
|
413
|
-
incomplete = True
|
414
|
-
except SendToPredictError as err:
|
415
|
-
logger.warning(
|
416
|
-
f"Errors on predict api trying to embedd query: {err}"
|
417
|
-
)
|
418
|
-
incomplete = True
|
419
|
-
except PredictVectorMissing:
|
420
|
-
logger.warning("Predict api returned an empty vector")
|
421
|
-
incomplete = True
|
463
|
+
try:
|
464
|
+
query_info = await self._get_query_information()
|
465
|
+
except SendToPredictError as err:
|
466
|
+
logger.warning(f"Errors on predict api trying to embedd query: {err}")
|
467
|
+
incomplete = True
|
422
468
|
else:
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
469
|
+
if query_info and query_info.sentence:
|
470
|
+
if vectorset:
|
471
|
+
if vectorset in query_info.sentence.vectors:
|
472
|
+
query_vector = query_info.sentence.vectors[vectorset]
|
473
|
+
else:
|
474
|
+
incomplete = True
|
475
|
+
else:
|
476
|
+
for vectorset_id, vector in query_info.sentence.vectors.items():
|
477
|
+
if vector:
|
478
|
+
query_vector = vector
|
479
|
+
break
|
480
|
+
else:
|
481
|
+
incomplete = True
|
482
|
+
|
483
|
+
else:
|
432
484
|
incomplete = True
|
433
485
|
else:
|
434
|
-
|
486
|
+
query_vector = self.user_vector
|
487
|
+
|
488
|
+
if query_vector is not None:
|
489
|
+
matryoshka_dimension = await self._get_matryoshka_dimension()
|
490
|
+
if matryoshka_dimension is not None:
|
491
|
+
# KB using a matryoshka embeddings model, cut the query vector
|
492
|
+
# accordingly
|
493
|
+
query_vector = query_vector[:matryoshka_dimension]
|
494
|
+
request.vector.extend(query_vector)
|
495
|
+
|
435
496
|
return incomplete
|
436
497
|
|
437
|
-
async def parse_relation_search(
|
438
|
-
self, request: nodereader_pb2.SearchRequest
|
439
|
-
) -> list[str]:
|
498
|
+
async def parse_relation_search(self, request: nodereader_pb2.SearchRequest) -> list[str]:
|
440
499
|
autofilters = []
|
441
|
-
|
442
|
-
|
443
|
-
if not self.query_endpoint_enabled:
|
500
|
+
if self.has_relations_search or self.autofilter:
|
501
|
+
if not self.query_endpoint_used:
|
444
502
|
detected_entities = await self._get_detected_entities()
|
445
503
|
else:
|
446
504
|
query_info_result = await self._get_query_information()
|
447
505
|
if query_info_result.entities:
|
448
|
-
detected_entities = convert_relations(
|
449
|
-
query_info_result.entities.dict()
|
450
|
-
)
|
506
|
+
detected_entities = convert_relations(query_info_result.entities.model_dump())
|
451
507
|
else:
|
452
508
|
detected_entities = []
|
453
509
|
meta_cache = await self._get_entities_meta_cache()
|
454
510
|
detected_entities = expand_entities(meta_cache, detected_entities)
|
455
|
-
if
|
511
|
+
if self.has_relations_search:
|
456
512
|
request.relation_subgraph.entry_points.extend(detected_entities)
|
457
513
|
request.relation_subgraph.depth = 1
|
458
|
-
request.relation_subgraph.deleted_groups.extend(
|
459
|
-
await self._get_deleted_entity_groups()
|
460
|
-
)
|
514
|
+
request.relation_subgraph.deleted_groups.extend(await self._get_deleted_entity_groups())
|
461
515
|
for group_id, deleted_entities in meta_cache.deleted_entities.items():
|
462
516
|
request.relation_subgraph.deleted_entities.append(
|
463
517
|
nodereader_pb2.EntitiesSubgraphRequest.DeletedEntities(
|
@@ -467,67 +521,111 @@ class QueryParser:
|
|
467
521
|
node_features.inc({"type": "relations"})
|
468
522
|
if self.autofilter:
|
469
523
|
entity_filters = parse_entities_to_filters(request, detected_entities)
|
470
|
-
autofilters.extend(
|
471
|
-
[translate_system_to_alias_label(e) for e in entity_filters]
|
472
|
-
)
|
524
|
+
autofilters.extend([translate_system_to_alias_label(e) for e in entity_filters])
|
473
525
|
return autofilters
|
474
526
|
|
475
527
|
async def parse_synonyms(self, request: nodereader_pb2.SearchRequest) -> None:
|
476
|
-
|
528
|
+
"""
|
529
|
+
Replace the terms in the query with an expression that will make it match with the configured synonyms.
|
530
|
+
We're using the Tantivy's query language here: https://docs.rs/tantivy/latest/tantivy/query/struct.QueryParser.html
|
531
|
+
|
532
|
+
Example:
|
533
|
+
- Synonyms: Foo -> Bar, Baz
|
534
|
+
- Query: "What is Foo?"
|
535
|
+
- Advanced Query: "What is (Foo OR Bar OR Baz)?"
|
536
|
+
"""
|
537
|
+
if not self.with_synonyms or not self.query:
|
538
|
+
# Nothing to do
|
477
539
|
return
|
478
540
|
|
479
|
-
if
|
480
|
-
SearchOptions.VECTOR in self.features
|
481
|
-
or SearchOptions.RELATIONS in self.features
|
482
|
-
):
|
541
|
+
if self.has_vector_search or self.has_relations_search:
|
483
542
|
raise InvalidQueryError(
|
484
543
|
"synonyms",
|
485
544
|
"Search with custom synonyms is only supported on paragraph and document search",
|
486
545
|
)
|
487
546
|
|
488
|
-
if not self.query:
|
489
|
-
# Nothing to do
|
490
|
-
return
|
491
|
-
|
492
547
|
synonyms = await self._get_synomyns()
|
493
548
|
if synonyms is None:
|
494
549
|
# No synonyms found
|
495
550
|
return
|
496
551
|
|
497
|
-
|
498
|
-
|
499
|
-
for term in
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
552
|
+
# Calculate term variants: 'term' -> '(term OR synonym1 OR synonym2)'
|
553
|
+
variants: dict[str, str] = {}
|
554
|
+
for term, term_synonyms in synonyms.terms.items():
|
555
|
+
if len(term_synonyms.synonyms) > 0:
|
556
|
+
variants[term] = "({})".format(" OR ".join([term] + list(term_synonyms.synonyms)))
|
557
|
+
|
558
|
+
# Split the query into terms
|
559
|
+
query_terms = self.query.split()
|
560
|
+
|
561
|
+
# Remove punctuation from the query terms
|
562
|
+
clean_query_terms = [term.strip(string.punctuation) for term in query_terms]
|
563
|
+
|
564
|
+
# Replace the original terms with the variants if the cleaned term is in the variants
|
565
|
+
term_with_synonyms_found = False
|
566
|
+
for index, clean_term in enumerate(clean_query_terms):
|
567
|
+
if clean_term in variants:
|
568
|
+
term_with_synonyms_found = True
|
569
|
+
query_terms[index] = query_terms[index].replace(clean_term, variants[clean_term])
|
570
|
+
|
571
|
+
if term_with_synonyms_found:
|
572
|
+
request.advanced_query = " ".join(query_terms)
|
509
573
|
request.ClearField("body")
|
510
574
|
|
511
575
|
async def get_visual_llm_enabled(self) -> bool:
|
512
576
|
return (await self._get_query_information()).visual_llm
|
513
577
|
|
514
|
-
async def
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
578
|
+
async def get_max_tokens_context(self) -> int:
|
579
|
+
model_max = (await self._get_query_information()).max_context
|
580
|
+
if self.max_tokens is not None and self.max_tokens.context is not None:
|
581
|
+
if self.max_tokens.context > model_max:
|
582
|
+
raise InvalidQueryError(
|
583
|
+
"max_tokens.context",
|
584
|
+
f"Max context tokens is higher than the model's limit of {model_max}",
|
585
|
+
)
|
586
|
+
return self.max_tokens.context
|
587
|
+
return model_max
|
588
|
+
|
589
|
+
def get_max_tokens_answer(self) -> Optional[int]:
|
590
|
+
if self.max_tokens is not None and self.max_tokens.answer is not None:
|
591
|
+
return self.max_tokens.answer
|
592
|
+
return None
|
593
|
+
|
594
|
+
async def adjust_page_size(
|
595
|
+
self,
|
596
|
+
request: nodereader_pb2.SearchRequest,
|
597
|
+
rank_fusion: Optional[RankFusionAlgorithm],
|
598
|
+
reranker: Optional[Reranker],
|
599
|
+
):
|
600
|
+
"""Adjust requested page size depending on rank fusion and reranking algorithms.
|
601
|
+
|
602
|
+
Some rerankers want more results than the requested by the user so
|
603
|
+
reranking can have more choices.
|
604
|
+
|
605
|
+
"""
|
606
|
+
rank_fusion_window = 0
|
607
|
+
if rank_fusion is not None:
|
608
|
+
rank_fusion_window = rank_fusion.window
|
609
|
+
|
610
|
+
reranker_window = 0
|
611
|
+
if reranker is not None:
|
612
|
+
reranker_window = reranker.window or 0
|
613
|
+
|
614
|
+
request.result_per_page = max(
|
615
|
+
request.result_per_page,
|
616
|
+
rank_fusion_window,
|
617
|
+
reranker_window,
|
618
|
+
)
|
519
619
|
|
520
620
|
|
521
621
|
async def paragraph_query_to_pb(
|
522
622
|
kbid: str,
|
523
|
-
features: list[SearchOptions],
|
524
623
|
rid: str,
|
525
624
|
query: str,
|
526
625
|
fields: list[str],
|
527
626
|
filters: list[str],
|
528
627
|
faceted: list[str],
|
529
|
-
|
530
|
-
page_size: int,
|
628
|
+
top_k: int,
|
531
629
|
range_creation_start: Optional[datetime] = None,
|
532
630
|
range_creation_end: Optional[datetime] = None,
|
533
631
|
range_modification_start: Optional[datetime] = None,
|
@@ -535,13 +633,37 @@ async def paragraph_query_to_pb(
|
|
535
633
|
sort: Optional[str] = None,
|
536
634
|
sort_ord: str = SortOrder.DESC.value,
|
537
635
|
with_duplicates: bool = False,
|
538
|
-
) -> nodereader_pb2.
|
539
|
-
request = nodereader_pb2.
|
540
|
-
request.
|
636
|
+
) -> nodereader_pb2.SearchRequest:
|
637
|
+
request = nodereader_pb2.SearchRequest()
|
638
|
+
request.paragraph = True
|
541
639
|
|
542
640
|
# We need to ask for all and cut later
|
543
641
|
request.page_number = 0
|
544
|
-
request.result_per_page =
|
642
|
+
request.result_per_page = top_k
|
643
|
+
|
644
|
+
request.body = query
|
645
|
+
|
646
|
+
# we don't have a specific filter only for resource_ids but key_filters
|
647
|
+
# parse "rid" and "rid/field" like ids, so it does the job
|
648
|
+
request.key_filters.append(rid)
|
649
|
+
|
650
|
+
if len(filters) > 0:
|
651
|
+
field_labels = filters
|
652
|
+
paragraph_labels: list[str] = []
|
653
|
+
if has_classification_label_filters(filters):
|
654
|
+
classification_labels = await get_classification_labels(kbid)
|
655
|
+
field_labels, paragraph_labels = split_labels_by_type(filters, classification_labels)
|
656
|
+
request.filter.field_labels.extend(field_labels)
|
657
|
+
request.filter.paragraph_labels.extend(paragraph_labels)
|
658
|
+
|
659
|
+
request.faceted.labels.extend([translate_label(facet) for facet in faceted])
|
660
|
+
request.fields.extend(fields)
|
661
|
+
|
662
|
+
if sort:
|
663
|
+
request.order.field = sort
|
664
|
+
request.order.type = sort_ord # type: ignore
|
665
|
+
|
666
|
+
request.with_duplicates = with_duplicates
|
545
667
|
|
546
668
|
if range_creation_start is not None:
|
547
669
|
request.timestamps.from_created.FromDatetime(range_creation_start)
|
@@ -555,44 +677,20 @@ async def paragraph_query_to_pb(
|
|
555
677
|
if range_modification_end is not None:
|
556
678
|
request.timestamps.to_modified.FromDatetime(range_modification_end)
|
557
679
|
|
558
|
-
if SearchOptions.PARAGRAPH in features:
|
559
|
-
request.uuid = rid
|
560
|
-
request.body = query
|
561
|
-
if len(filters) > 0:
|
562
|
-
field_labels = filters
|
563
|
-
paragraph_labels: list[str] = []
|
564
|
-
if has_classification_label_filters(filters):
|
565
|
-
classification_labels = await get_classification_labels(kbid)
|
566
|
-
field_labels, paragraph_labels = split_labels_by_type(
|
567
|
-
filters, classification_labels
|
568
|
-
)
|
569
|
-
request.filter.field_labels.extend(field_labels)
|
570
|
-
request.filter.paragraph_labels.extend(paragraph_labels)
|
571
|
-
|
572
|
-
request.faceted.labels.extend([translate_label(facet) for facet in faceted])
|
573
|
-
if sort:
|
574
|
-
request.order.field = sort
|
575
|
-
request.order.type = sort_ord # type: ignore
|
576
|
-
request.fields.extend(fields)
|
577
|
-
|
578
680
|
return request
|
579
681
|
|
580
682
|
|
581
|
-
@query_parse_dependency_observer.wrap({"type": "convert_vectors"})
|
582
|
-
async def convert_vectors(kbid: str, query: str) -> list[float]:
|
583
|
-
predict = get_predict()
|
584
|
-
return await predict.convert_sentence_to_vector(kbid, query)
|
585
|
-
|
586
|
-
|
587
683
|
@query_parse_dependency_observer.wrap({"type": "query_information"})
|
588
684
|
async def query_information(
|
589
685
|
kbid: str,
|
590
686
|
query: str,
|
687
|
+
semantic_model: Optional[str],
|
591
688
|
generative_model: Optional[str] = None,
|
592
689
|
rephrase: bool = False,
|
690
|
+
rephrase_prompt: Optional[str] = None,
|
593
691
|
) -> QueryInfo:
|
594
692
|
predict = get_predict()
|
595
|
-
return await predict.query(kbid, query, generative_model, rephrase)
|
693
|
+
return await predict.query(kbid, query, semantic_model, generative_model, rephrase, rephrase_prompt)
|
596
694
|
|
597
695
|
|
598
696
|
@query_parse_dependency_observer.wrap({"type": "detect_entities"})
|
@@ -632,9 +730,7 @@ def expand_entities(
|
|
632
730
|
)
|
633
731
|
|
634
732
|
if entity.value in duplicated_entities_by_value[entity.subtype]:
|
635
|
-
source_duplicate = duplicated_entities_by_value[entity.subtype][
|
636
|
-
entity.value
|
637
|
-
]
|
733
|
+
source_duplicate = duplicated_entities_by_value[entity.subtype][entity.value]
|
638
734
|
result_entities[source_duplicate] = utils_pb2.RelationNode(
|
639
735
|
ntype=utils_pb2.RelationNode.NodeType.ENTITY,
|
640
736
|
subtype=entity.subtype,
|
@@ -667,13 +763,16 @@ def parse_entities_to_filters(
|
|
667
763
|
if entity_filter not in request.filter.field_labels:
|
668
764
|
request.filter.field_labels.append(entity_filter)
|
669
765
|
added_filters.append(entity_filter)
|
766
|
+
|
670
767
|
# We need to expand the filter expression with the automatically detected entities.
|
671
768
|
if len(added_filters) > 0:
|
769
|
+
# So far, autofilters feature will only yield 'and' expressions with the detected entities.
|
770
|
+
# More complex autofilters can be added here if we leverage the query endpoint.
|
672
771
|
expanded_expression = {"and": [{"literal": entity} for entity in added_filters]}
|
673
|
-
if request.filter.
|
674
|
-
expression = json.loads(request.filter.
|
675
|
-
expanded_expression["and"].
|
676
|
-
request.filter.
|
772
|
+
if request.filter.labels_expression:
|
773
|
+
expression = json.loads(request.filter.labels_expression)
|
774
|
+
expanded_expression["and"].append(expression)
|
775
|
+
request.filter.labels_expression = json.dumps(expanded_expression)
|
677
776
|
return added_filters
|
678
777
|
|
679
778
|
|
@@ -687,6 +786,7 @@ def suggest_query_to_pb(
|
|
687
786
|
range_creation_end: Optional[datetime] = None,
|
688
787
|
range_modification_start: Optional[datetime] = None,
|
689
788
|
range_modification_end: Optional[datetime] = None,
|
789
|
+
hidden: Optional[bool] = None,
|
690
790
|
) -> nodereader_pb2.SuggestRequest:
|
691
791
|
request = nodereader_pb2.SuggestRequest()
|
692
792
|
|
@@ -696,10 +796,21 @@ def suggest_query_to_pb(
|
|
696
796
|
|
697
797
|
if SuggestOptions.PARAGRAPH in features:
|
698
798
|
request.features.append(nodereader_pb2.SuggestFeatures.PARAGRAPHS)
|
699
|
-
filters = [translate_label(fltr) for fltr in filters]
|
700
|
-
request.filter.field_labels.extend(filters)
|
701
799
|
request.fields.extend(fields)
|
702
800
|
|
801
|
+
if hidden is not None:
|
802
|
+
if hidden:
|
803
|
+
filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
|
804
|
+
else:
|
805
|
+
filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
|
806
|
+
|
807
|
+
expression = convert_to_node_filters(filters)
|
808
|
+
if expression:
|
809
|
+
expression = translate_label_filters(expression)
|
810
|
+
|
811
|
+
request.filter.field_labels.extend(flatten_filter_literals(expression))
|
812
|
+
request.filter.labels_expression = json.dumps(expression)
|
813
|
+
|
703
814
|
if range_creation_start is not None:
|
704
815
|
request.timestamps.from_created.FromDatetime(range_creation_start)
|
705
816
|
if range_creation_end is not None:
|
@@ -722,49 +833,28 @@ PROCESSING_STATUS_TO_PB_MAP = {
|
|
722
833
|
}
|
723
834
|
|
724
835
|
|
725
|
-
@query_parse_dependency_observer.wrap({"type": "min_score"})
|
726
|
-
async def get_kb_model_default_min_score(kbid: str) -> Optional[float]:
|
727
|
-
txn = await get_read_only_transaction()
|
728
|
-
model = await datamanagers.kb.get_model_metadata(txn, kbid=kbid)
|
729
|
-
if model.HasField("default_min_score"):
|
730
|
-
return model.default_min_score
|
731
|
-
else:
|
732
|
-
return None
|
733
|
-
|
734
|
-
|
735
|
-
@alru_cache(maxsize=None)
|
736
|
-
async def get_default_semantic_min_score(kbid: str) -> float:
|
737
|
-
fallback = 0.7
|
738
|
-
model_min_score = await get_kb_model_default_min_score(kbid)
|
739
|
-
if model_min_score is not None:
|
740
|
-
return model_min_score
|
741
|
-
return fallback
|
742
|
-
|
743
|
-
|
744
836
|
@query_parse_dependency_observer.wrap({"type": "synonyms"})
|
745
837
|
async def get_kb_synonyms(kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
|
746
|
-
|
747
|
-
|
838
|
+
async with get_driver().transaction(read_only=True) as txn:
|
839
|
+
return await datamanagers.synonyms.get(txn, kbid=kbid)
|
748
840
|
|
749
841
|
|
750
842
|
@query_parse_dependency_observer.wrap({"type": "entities_meta_cache"})
|
751
843
|
async def get_entities_meta_cache(kbid: str) -> datamanagers.entities.EntitiesMetaCache:
|
752
|
-
|
753
|
-
|
844
|
+
async with get_driver().transaction(read_only=True) as txn:
|
845
|
+
return await datamanagers.entities.get_entities_meta_cache(txn, kbid=kbid)
|
754
846
|
|
755
847
|
|
756
848
|
@query_parse_dependency_observer.wrap({"type": "deleted_entities_groups"})
|
757
849
|
async def get_deleted_entity_groups(kbid: str) -> list[str]:
|
758
|
-
|
759
|
-
|
760
|
-
(await datamanagers.entities.get_deleted_groups(txn, kbid=kbid)).entities_groups
|
761
|
-
)
|
850
|
+
async with get_driver().transaction(read_only=True) as txn:
|
851
|
+
return list((await datamanagers.entities.get_deleted_groups(txn, kbid=kbid)).entities_groups)
|
762
852
|
|
763
853
|
|
764
854
|
@query_parse_dependency_observer.wrap({"type": "classification_labels"})
|
765
855
|
async def get_classification_labels(kbid: str) -> knowledgebox_pb2.Labels:
|
766
|
-
|
767
|
-
|
856
|
+
async with get_driver().transaction(read_only=True) as txn:
|
857
|
+
return await datamanagers.labels.get_labels(txn, kbid=kbid)
|
768
858
|
|
769
859
|
|
770
860
|
def check_supported_filters(filters: dict[str, Any], paragraph_labels: list[str]):
|
@@ -783,9 +873,48 @@ def check_supported_filters(filters: dict[str, Any], paragraph_labels: list[str]
|
|
783
873
|
"Paragraph labels can only be used with 'all' filter",
|
784
874
|
)
|
785
875
|
for term in filters["and"]:
|
786
|
-
# Nested expressions are not allowed with paragraph labels
|
787
|
-
if "
|
876
|
+
# Nested expressions are not allowed with paragraph labels (only "literal" and "not(literal)")
|
877
|
+
if "not" in term:
|
878
|
+
subterm = term["not"]
|
879
|
+
if "literal" not in subterm:
|
880
|
+
# AND (NOT( X )) where X is anything other than a literal
|
881
|
+
raise InvalidQueryError(
|
882
|
+
"filters",
|
883
|
+
"Paragraph labels can only be used with 'all' filter",
|
884
|
+
)
|
885
|
+
elif "literal" not in term:
|
788
886
|
raise InvalidQueryError(
|
789
887
|
"filters",
|
790
888
|
"Paragraph labels can only be used with 'all' filter",
|
791
889
|
)
|
890
|
+
|
891
|
+
|
892
|
+
@alru_cache(maxsize=None)
|
893
|
+
async def get_matryoshka_dimension_cached(kbid: str, vectorset: Optional[str]) -> Optional[int]:
|
894
|
+
# This can be safely cached as the matryoshka dimension is not expected to change
|
895
|
+
return await get_matryoshka_dimension(kbid, vectorset)
|
896
|
+
|
897
|
+
|
898
|
+
@query_parse_dependency_observer.wrap({"type": "matryoshka_dimension"})
|
899
|
+
async def get_matryoshka_dimension(kbid: str, vectorset: Optional[str]) -> Optional[int]:
|
900
|
+
async with get_driver().transaction(read_only=True) as txn:
|
901
|
+
matryoshka_dimension = None
|
902
|
+
if not vectorset:
|
903
|
+
# XXX this should be migrated once we remove the "default" vectorset
|
904
|
+
# concept
|
905
|
+
matryoshka_dimension = await datamanagers.kb.get_matryoshka_vector_dimension(txn, kbid=kbid)
|
906
|
+
else:
|
907
|
+
vectorset_config = await datamanagers.vectorsets.get(txn, kbid=kbid, vectorset_id=vectorset)
|
908
|
+
if vectorset_config is not None and vectorset_config.vectorset_index_config.vector_dimension:
|
909
|
+
matryoshka_dimension = vectorset_config.vectorset_index_config.vector_dimension
|
910
|
+
|
911
|
+
return matryoshka_dimension
|
912
|
+
|
913
|
+
|
914
|
+
def get_sort_field_proto(obj: SortField) -> Optional[nodereader_pb2.OrderBy.OrderField.ValueType]:
|
915
|
+
return {
|
916
|
+
SortField.SCORE: None,
|
917
|
+
SortField.CREATED: nodereader_pb2.OrderBy.OrderField.CREATED,
|
918
|
+
SortField.MODIFIED: nodereader_pb2.OrderBy.OrderField.MODIFIED,
|
919
|
+
SortField.TITLE: None,
|
920
|
+
}[obj]
|