nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/search/search/merge.py
CHANGED
@@ -22,19 +22,10 @@ import datetime
|
|
22
22
|
import math
|
23
23
|
from typing import Any, Optional, Set, Union
|
24
24
|
|
25
|
-
from
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
EntitiesSubgraphRequest,
|
30
|
-
ParagraphResult,
|
31
|
-
ParagraphSearchResponse,
|
32
|
-
RelationSearchResponse,
|
33
|
-
SearchResponse,
|
34
|
-
SuggestResponse,
|
35
|
-
VectorSearchResponse,
|
36
|
-
)
|
37
|
-
|
25
|
+
from nucliadb.common.ids import FieldId, ParagraphId
|
26
|
+
from nucliadb.common.models_utils.from_proto import RelationTypePbMap
|
27
|
+
from nucliadb.search.search import cache
|
28
|
+
from nucliadb.search.search.cut import cut_page
|
38
29
|
from nucliadb.search.search.fetch import (
|
39
30
|
fetch_resources,
|
40
31
|
get_labels_paragraph,
|
@@ -43,11 +34,11 @@ from nucliadb.search.search.fetch import (
|
|
43
34
|
)
|
44
35
|
from nucliadb_models.common import FieldTypeName
|
45
36
|
from nucliadb_models.labels import translate_system_to_alias_label
|
46
|
-
from nucliadb_models.metadata import RelationTypePbMap
|
47
37
|
from nucliadb_models.resource import ExtractedDataTypeName
|
48
38
|
from nucliadb_models.search import (
|
49
39
|
DirectionalRelation,
|
50
40
|
EntitySubgraph,
|
41
|
+
EntityType,
|
51
42
|
KnowledgeboxSearchResults,
|
52
43
|
KnowledgeboxSuggestResults,
|
53
44
|
MinScore,
|
@@ -56,7 +47,6 @@ from nucliadb_models.search import (
|
|
56
47
|
RelatedEntities,
|
57
48
|
RelatedEntity,
|
58
49
|
RelationDirection,
|
59
|
-
RelationNodeTypeMap,
|
60
50
|
Relations,
|
61
51
|
ResourceProperties,
|
62
52
|
ResourceResult,
|
@@ -69,38 +59,59 @@ from nucliadb_models.search import (
|
|
69
59
|
SortOrder,
|
70
60
|
TextPosition,
|
71
61
|
)
|
62
|
+
from nucliadb_protos.nodereader_pb2 import (
|
63
|
+
DocumentResult,
|
64
|
+
DocumentScored,
|
65
|
+
DocumentSearchResponse,
|
66
|
+
EntitiesSubgraphRequest,
|
67
|
+
ParagraphResult,
|
68
|
+
ParagraphSearchResponse,
|
69
|
+
RelationSearchResponse,
|
70
|
+
SearchResponse,
|
71
|
+
SuggestResponse,
|
72
|
+
VectorSearchResponse,
|
73
|
+
)
|
74
|
+
from nucliadb_protos.utils_pb2 import RelationNode
|
72
75
|
|
73
|
-
from .cache import get_resource_cache, get_resource_from_cache
|
74
76
|
from .metrics import merge_observer
|
75
|
-
from .paragraphs import
|
77
|
+
from .paragraphs import get_paragraph_text, get_text_sentence
|
76
78
|
|
77
79
|
Bm25Score = tuple[float, float]
|
78
80
|
TimestampScore = datetime.datetime
|
79
81
|
TitleScore = str
|
80
|
-
|
82
|
+
SortValue = Union[Bm25Score, TimestampScore, TitleScore]
|
83
|
+
|
84
|
+
|
85
|
+
def relation_node_type_to_entity_type(node_type: RelationNode.NodeType.ValueType) -> EntityType:
|
86
|
+
return {
|
87
|
+
RelationNode.NodeType.ENTITY: EntityType.ENTITY,
|
88
|
+
RelationNode.NodeType.LABEL: EntityType.LABEL,
|
89
|
+
RelationNode.NodeType.RESOURCE: EntityType.RESOURCE,
|
90
|
+
RelationNode.NodeType.USER: EntityType.USER,
|
91
|
+
}[node_type]
|
81
92
|
|
82
93
|
|
83
94
|
def sort_results_by_score(results: Union[list[ParagraphResult], list[DocumentResult]]):
|
84
95
|
results.sort(key=lambda x: (x.score.bm25, x.score.booster), reverse=True)
|
85
96
|
|
86
97
|
|
87
|
-
async def
|
98
|
+
async def get_sort_value(
|
88
99
|
item: Union[DocumentResult, ParagraphResult],
|
89
100
|
sort_field: SortField,
|
90
101
|
kbid: str,
|
91
|
-
) -> Optional[
|
102
|
+
) -> Optional[SortValue]:
|
92
103
|
"""Returns the score for given `item` and `sort_field`. If the resource is being
|
93
104
|
deleted, it might appear on search results but not in maindb. In this
|
94
105
|
specific case, return None.
|
95
|
-
|
96
106
|
"""
|
97
107
|
if sort_field == SortField.SCORE:
|
98
108
|
return (item.score.bm25, item.score.booster)
|
99
109
|
|
100
110
|
score: Any = None
|
101
|
-
resource = await
|
111
|
+
resource = await cache.get_resource(kbid, item.uuid)
|
102
112
|
if resource is None:
|
103
113
|
return score
|
114
|
+
|
104
115
|
basic = await resource.get_basic()
|
105
116
|
if basic is None:
|
106
117
|
return score
|
@@ -118,13 +129,12 @@ async def text_score(
|
|
118
129
|
async def merge_documents_results(
|
119
130
|
document_responses: list[DocumentSearchResponse],
|
120
131
|
resources: list[str],
|
121
|
-
|
122
|
-
page: int,
|
132
|
+
top_k: int,
|
123
133
|
kbid: str,
|
124
134
|
sort: SortOptions,
|
125
135
|
min_score: float,
|
126
136
|
) -> Resources:
|
127
|
-
raw_resource_list: list[tuple[DocumentResult,
|
137
|
+
raw_resource_list: list[tuple[DocumentResult, SortValue]] = []
|
128
138
|
facets: dict[str, Any] = {}
|
129
139
|
query = None
|
130
140
|
total = 0
|
@@ -143,24 +153,18 @@ async def merge_documents_results(
|
|
143
153
|
if document_response.next_page:
|
144
154
|
next_page = True
|
145
155
|
for result in document_response.results:
|
146
|
-
|
147
|
-
if
|
148
|
-
raw_resource_list.append((result,
|
156
|
+
sort_value = await get_sort_value(result, sort.field, kbid)
|
157
|
+
if sort_value is not None:
|
158
|
+
raw_resource_list.append((result, sort_value))
|
149
159
|
total += document_response.total
|
150
160
|
|
161
|
+
# We need to cut first and then sort, otherwise the page will be wrong if the order is DESC
|
162
|
+
raw_resource_list, has_more = cut_page(raw_resource_list, top_k)
|
163
|
+
next_page = next_page or has_more
|
151
164
|
raw_resource_list.sort(key=lambda x: x[1], reverse=(sort.order == SortOrder.DESC))
|
152
165
|
|
153
|
-
skip = page * count
|
154
|
-
end = skip + count
|
155
|
-
length = len(raw_resource_list)
|
156
|
-
|
157
|
-
if length > end:
|
158
|
-
next_page = True
|
159
|
-
|
160
166
|
result_resource_list: list[ResourceResult] = []
|
161
|
-
for result, _ in raw_resource_list
|
162
|
-
# /f/file
|
163
|
-
|
167
|
+
for result, _ in raw_resource_list:
|
164
168
|
labels = await get_labels_resource(result, kbid)
|
165
169
|
_, field_type, field = result.field.split("/")
|
166
170
|
|
@@ -181,8 +185,8 @@ async def merge_documents_results(
|
|
181
185
|
results=result_resource_list,
|
182
186
|
query=query,
|
183
187
|
total=total,
|
184
|
-
page_number=
|
185
|
-
page_size=
|
188
|
+
page_number=0, # Bw/c with pagination
|
189
|
+
page_size=top_k,
|
186
190
|
next_page=next_page,
|
187
191
|
min_score=min_score,
|
188
192
|
)
|
@@ -207,65 +211,58 @@ async def merge_suggest_paragraph_results(
|
|
207
211
|
if len(suggest_responses) > 1:
|
208
212
|
sort_results_by_score(raw_paragraph_list)
|
209
213
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
end=result.end,
|
222
|
-
split=result.split,
|
223
|
-
highlight=highlight,
|
224
|
-
ematches=ematches, # type: ignore
|
225
|
-
matches=result.matches, # type: ignore
|
226
|
-
extracted_text_cache=etcache,
|
227
|
-
)
|
228
|
-
labels = await get_labels_paragraph(result, kbid)
|
229
|
-
new_paragraph = Paragraph(
|
230
|
-
score=result.score.bm25,
|
231
|
-
rid=result.uuid,
|
232
|
-
field_type=field_type,
|
233
|
-
field=field,
|
234
|
-
text=text,
|
235
|
-
labels=labels,
|
236
|
-
position=TextPosition(
|
237
|
-
index=result.metadata.position.index,
|
238
|
-
start=result.metadata.position.start,
|
239
|
-
end=result.metadata.position.end,
|
240
|
-
page_number=result.metadata.position.page_number,
|
214
|
+
result_paragraph_list: list[Paragraph] = []
|
215
|
+
for result in raw_paragraph_list[:10]:
|
216
|
+
_, field_type, field = result.field.split("/")
|
217
|
+
text = await get_paragraph_text(
|
218
|
+
kbid=kbid,
|
219
|
+
paragraph_id=ParagraphId(
|
220
|
+
field_id=FieldId(
|
221
|
+
rid=result.uuid,
|
222
|
+
type=field_type,
|
223
|
+
key=field,
|
224
|
+
subfield_id=result.split,
|
241
225
|
),
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
226
|
+
paragraph_start=result.start,
|
227
|
+
paragraph_end=result.end,
|
228
|
+
),
|
229
|
+
highlight=highlight,
|
230
|
+
ematches=ematches, # type: ignore
|
231
|
+
matches=result.matches, # type: ignore
|
232
|
+
)
|
233
|
+
labels = await get_labels_paragraph(result, kbid)
|
234
|
+
new_paragraph = Paragraph(
|
235
|
+
score=result.score.bm25,
|
236
|
+
rid=result.uuid,
|
237
|
+
field_type=field_type,
|
238
|
+
field=field,
|
239
|
+
text=text,
|
240
|
+
labels=labels,
|
241
|
+
position=TextPosition(
|
242
|
+
index=result.metadata.position.index,
|
243
|
+
start=result.metadata.position.start,
|
244
|
+
end=result.metadata.position.end,
|
245
|
+
page_number=result.metadata.position.page_number,
|
246
|
+
),
|
247
|
+
)
|
248
|
+
if len(result.metadata.position.start_seconds) or len(result.metadata.position.end_seconds):
|
249
|
+
new_paragraph.start_seconds = list(result.metadata.position.start_seconds)
|
250
|
+
new_paragraph.end_seconds = list(result.metadata.position.end_seconds)
|
251
|
+
else:
|
252
|
+
# TODO: Remove once we are sure all data has been migrated!
|
253
|
+
seconds_positions = await get_seconds_paragraph(result, kbid)
|
254
|
+
if seconds_positions is not None:
|
255
|
+
new_paragraph.start_seconds = seconds_positions[0]
|
256
|
+
new_paragraph.end_seconds = seconds_positions[1]
|
257
|
+
result_paragraph_list.append(new_paragraph)
|
258
|
+
return Paragraphs(results=result_paragraph_list, query=query, min_score=0)
|
261
259
|
|
262
260
|
|
263
261
|
async def merge_vectors_results(
|
264
262
|
vector_responses: list[VectorSearchResponse],
|
265
263
|
resources: list[str],
|
266
264
|
kbid: str,
|
267
|
-
|
268
|
-
page: int,
|
265
|
+
top_k: int,
|
269
266
|
min_score: Optional[float] = None,
|
270
267
|
):
|
271
268
|
facets: dict[str, Any] = {}
|
@@ -282,12 +279,10 @@ async def merge_vectors_results(
|
|
282
279
|
if len(vector_responses) > 1:
|
283
280
|
raw_vectors_list.sort(key=lambda x: x.score, reverse=True)
|
284
281
|
|
285
|
-
|
286
|
-
end_element = skip + count
|
287
|
-
length = len(raw_vectors_list)
|
282
|
+
raw_vectors_list, _ = cut_page(raw_vectors_list, top_k)
|
288
283
|
|
289
284
|
result_sentence_list: list[Sentence] = []
|
290
|
-
for result in raw_vectors_list
|
285
|
+
for result in raw_vectors_list:
|
291
286
|
id_count = result.doc_id.id.count("/")
|
292
287
|
if id_count == 4:
|
293
288
|
rid, field_type, field, index, position = result.doc_id.id.split("/")
|
@@ -335,8 +330,8 @@ async def merge_vectors_results(
|
|
335
330
|
return Sentences(
|
336
331
|
results=result_sentence_list,
|
337
332
|
facets=facets,
|
338
|
-
page_number=
|
339
|
-
page_size=
|
333
|
+
page_number=0, # Bw/c with pagination
|
334
|
+
page_size=top_k,
|
340
335
|
min_score=round(min_score or 0, ndigits=3),
|
341
336
|
)
|
342
337
|
|
@@ -345,13 +340,12 @@ async def merge_paragraph_results(
|
|
345
340
|
paragraph_responses: list[ParagraphSearchResponse],
|
346
341
|
resources: list[str],
|
347
342
|
kbid: str,
|
348
|
-
|
349
|
-
page: int,
|
343
|
+
top_k: int,
|
350
344
|
highlight: bool,
|
351
345
|
sort: SortOptions,
|
352
346
|
min_score: float,
|
353
|
-
):
|
354
|
-
raw_paragraph_list: list[tuple[ParagraphResult,
|
347
|
+
) -> Paragraphs:
|
348
|
+
raw_paragraph_list: list[tuple[ParagraphResult, SortValue]] = []
|
355
349
|
facets: dict[str, Any] = {}
|
356
350
|
query = None
|
357
351
|
next_page = False
|
@@ -373,83 +367,75 @@ async def merge_paragraph_results(
|
|
373
367
|
if paragraph_response.next_page:
|
374
368
|
next_page = True
|
375
369
|
for result in paragraph_response.results:
|
376
|
-
score = await
|
370
|
+
score = await get_sort_value(result, sort.field, kbid)
|
377
371
|
if score is not None:
|
378
372
|
raw_paragraph_list.append((result, score))
|
379
373
|
total += paragraph_response.total
|
380
374
|
|
381
375
|
raw_paragraph_list.sort(key=lambda x: x[1], reverse=(sort.order == SortOrder.DESC))
|
382
376
|
|
383
|
-
|
384
|
-
|
385
|
-
length = len(raw_paragraph_list)
|
386
|
-
|
387
|
-
if length > end:
|
388
|
-
next_page = True
|
377
|
+
raw_paragraph_list, has_more = cut_page(raw_paragraph_list, top_k)
|
378
|
+
next_page = next_page or has_more
|
389
379
|
|
390
380
|
result_paragraph_list: list[Paragraph] = []
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
split=result.split,
|
402
|
-
highlight=highlight,
|
403
|
-
ematches=ematches,
|
404
|
-
matches=result.matches, # type: ignore
|
405
|
-
extracted_text_cache=etcache,
|
406
|
-
)
|
407
|
-
labels = await get_labels_paragraph(result, kbid)
|
408
|
-
fuzzy_result = len(result.matches) > 0
|
409
|
-
new_paragraph = Paragraph(
|
410
|
-
score=result.score.bm25,
|
411
|
-
rid=result.uuid,
|
412
|
-
field_type=field_type,
|
413
|
-
field=field,
|
414
|
-
text=text,
|
415
|
-
labels=labels,
|
416
|
-
position=TextPosition(
|
417
|
-
index=result.metadata.position.index,
|
418
|
-
start=result.metadata.position.start,
|
419
|
-
end=result.metadata.position.end,
|
420
|
-
page_number=result.metadata.position.page_number,
|
381
|
+
for result, _ in raw_paragraph_list:
|
382
|
+
_, field_type, field = result.field.split("/")
|
383
|
+
text = await get_paragraph_text(
|
384
|
+
kbid=kbid,
|
385
|
+
paragraph_id=ParagraphId(
|
386
|
+
field_id=FieldId(
|
387
|
+
rid=result.uuid,
|
388
|
+
type=field_type,
|
389
|
+
key=field,
|
390
|
+
subfield_id=result.split,
|
421
391
|
),
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
result.metadata.position.start_seconds
|
429
|
-
)
|
430
|
-
new_paragraph.end_seconds = list(result.metadata.position.end_seconds)
|
431
|
-
else:
|
432
|
-
# TODO: Remove once we are sure all data has been migrated!
|
433
|
-
seconds_positions = await get_seconds_paragraph(result, kbid)
|
434
|
-
if seconds_positions is not None:
|
435
|
-
new_paragraph.start_seconds = seconds_positions[0]
|
436
|
-
new_paragraph.end_seconds = seconds_positions[1]
|
437
|
-
|
438
|
-
result_paragraph_list.append(new_paragraph)
|
439
|
-
if new_paragraph.rid not in resources:
|
440
|
-
resources.append(new_paragraph.rid)
|
441
|
-
return Paragraphs(
|
442
|
-
results=result_paragraph_list,
|
443
|
-
facets=facets,
|
444
|
-
query=query,
|
445
|
-
total=total,
|
446
|
-
page_number=page,
|
447
|
-
page_size=count,
|
448
|
-
next_page=next_page,
|
449
|
-
min_score=min_score,
|
392
|
+
paragraph_start=result.start,
|
393
|
+
paragraph_end=result.end,
|
394
|
+
),
|
395
|
+
highlight=highlight,
|
396
|
+
ematches=ematches,
|
397
|
+
matches=result.matches, # type: ignore
|
450
398
|
)
|
451
|
-
|
452
|
-
|
399
|
+
labels = await get_labels_paragraph(result, kbid)
|
400
|
+
fuzzy_result = len(result.matches) > 0
|
401
|
+
new_paragraph = Paragraph(
|
402
|
+
score=result.score.bm25,
|
403
|
+
rid=result.uuid,
|
404
|
+
field_type=field_type,
|
405
|
+
field=field,
|
406
|
+
text=text,
|
407
|
+
labels=labels,
|
408
|
+
position=TextPosition(
|
409
|
+
index=result.metadata.position.index,
|
410
|
+
start=result.metadata.position.start,
|
411
|
+
end=result.metadata.position.end,
|
412
|
+
page_number=result.metadata.position.page_number,
|
413
|
+
),
|
414
|
+
fuzzy_result=fuzzy_result,
|
415
|
+
)
|
416
|
+
if len(result.metadata.position.start_seconds) or len(result.metadata.position.end_seconds):
|
417
|
+
new_paragraph.start_seconds = list(result.metadata.position.start_seconds)
|
418
|
+
new_paragraph.end_seconds = list(result.metadata.position.end_seconds)
|
419
|
+
else:
|
420
|
+
# TODO: Remove once we are sure all data has been migrated!
|
421
|
+
seconds_positions = await get_seconds_paragraph(result, kbid)
|
422
|
+
if seconds_positions is not None:
|
423
|
+
new_paragraph.start_seconds = seconds_positions[0]
|
424
|
+
new_paragraph.end_seconds = seconds_positions[1]
|
425
|
+
|
426
|
+
result_paragraph_list.append(new_paragraph)
|
427
|
+
if new_paragraph.rid not in resources:
|
428
|
+
resources.append(new_paragraph.rid)
|
429
|
+
return Paragraphs(
|
430
|
+
results=result_paragraph_list,
|
431
|
+
facets=facets,
|
432
|
+
query=query,
|
433
|
+
total=total,
|
434
|
+
page_number=0, # Bw/c with pagination
|
435
|
+
page_size=top_k,
|
436
|
+
next_page=next_page,
|
437
|
+
min_score=min_score,
|
438
|
+
)
|
453
439
|
|
454
440
|
|
455
441
|
@merge_observer.wrap({"type": "merge_relations"})
|
@@ -458,9 +444,7 @@ async def merge_relations_results(
|
|
458
444
|
query: EntitiesSubgraphRequest,
|
459
445
|
) -> Relations:
|
460
446
|
loop = asyncio.get_event_loop()
|
461
|
-
return await loop.run_in_executor(
|
462
|
-
None, _merge_relations_results, relations_responses, query
|
463
|
-
)
|
447
|
+
return await loop.run_in_executor(None, _merge_relations_results, relations_responses, query)
|
464
448
|
|
465
449
|
|
466
450
|
def _merge_relations_results(
|
@@ -483,7 +467,7 @@ def _merge_relations_results(
|
|
483
467
|
relations.entities[origin.value].related_to.append(
|
484
468
|
DirectionalRelation(
|
485
469
|
entity=destination.value,
|
486
|
-
entity_type=
|
470
|
+
entity_type=relation_node_type_to_entity_type(destination.ntype),
|
487
471
|
relation=relation_type,
|
488
472
|
relation_label=relation_label,
|
489
473
|
direction=RelationDirection.OUT,
|
@@ -493,7 +477,7 @@ def _merge_relations_results(
|
|
493
477
|
relations.entities[destination.value].related_to.append(
|
494
478
|
DirectionalRelation(
|
495
479
|
entity=origin.value,
|
496
|
-
entity_type=
|
480
|
+
entity_type=relation_node_type_to_entity_type(origin.ntype),
|
497
481
|
relation=relation_type,
|
498
482
|
relation_label=relation_label,
|
499
483
|
direction=RelationDirection.IN,
|
@@ -506,8 +490,7 @@ def _merge_relations_results(
|
|
506
490
|
@merge_observer.wrap({"type": "merge"})
|
507
491
|
async def merge_results(
|
508
492
|
search_responses: list[SearchResponse],
|
509
|
-
|
510
|
-
page: int,
|
493
|
+
top_k: int,
|
511
494
|
kbid: str,
|
512
495
|
show: list[ResourceProperties],
|
513
496
|
field_type_filter: list[FieldTypeName],
|
@@ -530,77 +513,59 @@ async def merge_results(
|
|
530
513
|
|
531
514
|
api_results = KnowledgeboxSearchResults()
|
532
515
|
|
533
|
-
|
534
|
-
|
535
|
-
resources
|
536
|
-
|
537
|
-
documents, resources, count, page, kbid, sort, min_score=min_score.bm25
|
538
|
-
)
|
516
|
+
resources: list[str] = list()
|
517
|
+
api_results.fulltext = await merge_documents_results(
|
518
|
+
documents, resources, top_k, kbid, sort, min_score=min_score.bm25
|
519
|
+
)
|
539
520
|
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
)
|
521
|
+
api_results.paragraphs = await merge_paragraph_results(
|
522
|
+
paragraphs,
|
523
|
+
resources,
|
524
|
+
kbid,
|
525
|
+
top_k,
|
526
|
+
highlight,
|
527
|
+
sort,
|
528
|
+
min_score=min_score.bm25,
|
529
|
+
)
|
550
530
|
|
551
|
-
|
552
|
-
|
553
|
-
|
531
|
+
api_results.sentences = await merge_vectors_results(
|
532
|
+
vectors, resources, kbid, top_k, min_score=min_score.semantic
|
533
|
+
)
|
554
534
|
|
555
|
-
|
556
|
-
relations, requested_relations
|
557
|
-
)
|
535
|
+
api_results.relations = await merge_relations_results(relations, requested_relations)
|
558
536
|
|
559
|
-
|
560
|
-
|
561
|
-
)
|
562
|
-
return api_results
|
563
|
-
finally:
|
564
|
-
rcache.clear()
|
537
|
+
api_results.resources = await fetch_resources(resources, kbid, show, field_type_filter, extracted)
|
538
|
+
return api_results
|
565
539
|
|
566
540
|
|
567
541
|
async def merge_paragraphs_results(
|
568
|
-
|
569
|
-
|
570
|
-
page: int,
|
542
|
+
responses: list[SearchResponse],
|
543
|
+
top_k: int,
|
571
544
|
kbid: str,
|
572
|
-
show: list[ResourceProperties],
|
573
|
-
field_type_filter: list[FieldTypeName],
|
574
|
-
extracted: list[ExtractedDataTypeName],
|
575
545
|
highlight_split: bool,
|
576
546
|
min_score: float,
|
577
547
|
) -> ResourceSearchResults:
|
578
548
|
paragraphs = []
|
579
|
-
for result in
|
580
|
-
paragraphs.append(result)
|
549
|
+
for result in responses:
|
550
|
+
paragraphs.append(result.paragraph)
|
581
551
|
|
582
552
|
api_results = ResourceSearchResults()
|
583
553
|
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
min_score=min_score,
|
600
|
-
)
|
601
|
-
return api_results
|
602
|
-
finally:
|
603
|
-
rcache.clear()
|
554
|
+
resources: list[str] = list()
|
555
|
+
api_results.paragraphs = await merge_paragraph_results(
|
556
|
+
paragraphs,
|
557
|
+
resources,
|
558
|
+
kbid,
|
559
|
+
top_k,
|
560
|
+
highlight=highlight_split,
|
561
|
+
sort=SortOptions(
|
562
|
+
field=SortField.SCORE,
|
563
|
+
order=SortOrder.DESC,
|
564
|
+
limit=None,
|
565
|
+
),
|
566
|
+
min_score=min_score,
|
567
|
+
)
|
568
|
+
return api_results
|
604
569
|
|
605
570
|
|
606
571
|
async def merge_suggest_entities_results(
|
@@ -609,8 +574,7 @@ async def merge_suggest_entities_results(
|
|
609
574
|
unique_entities: Set[RelatedEntity] = set()
|
610
575
|
for response in suggest_responses:
|
611
576
|
response_entities = (
|
612
|
-
RelatedEntity(family=e.subtype, value=e.value)
|
613
|
-
for e in response.entity_results.nodes
|
577
|
+
RelatedEntity(family=e.subtype, value=e.value) for e in response.entity_results.nodes
|
614
578
|
)
|
615
579
|
unique_entities.update(response_entities)
|
616
580
|
|