nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,44 +17,44 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
import json
|
21
20
|
import uuid
|
22
|
-
from
|
23
|
-
from typing import AsyncIterator, Optional
|
21
|
+
from typing import AsyncIterator
|
24
22
|
|
23
|
+
from nucliadb.common import datamanagers
|
24
|
+
from nucliadb.common.cluster.exceptions import AlreadyExists, EntitiesGroupNotFound
|
25
|
+
from nucliadb.common.cluster.manager import get_index_nodes
|
26
|
+
from nucliadb.common.cluster.utils import get_shard_manager
|
27
|
+
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
28
|
+
from nucliadb.common.external_index_providers.exceptions import ExternalIndexCreationError
|
29
|
+
from nucliadb.common.external_index_providers.manager import get_external_index_manager
|
30
|
+
from nucliadb.common.maindb.utils import setup_driver
|
31
|
+
from nucliadb.ingest import SERVICE_NAME, logger
|
32
|
+
from nucliadb.ingest.orm.broker_message import generate_broker_message
|
33
|
+
from nucliadb.ingest.orm.entities import EntitiesManager
|
34
|
+
from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict, VectorSetConflict
|
35
|
+
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
36
|
+
from nucliadb.ingest.orm.processor import Processor, sequence_manager
|
37
|
+
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
38
|
+
from nucliadb.ingest.settings import settings
|
39
|
+
from nucliadb_protos import nodewriter_pb2, writer_pb2, writer_pb2_grpc
|
25
40
|
from nucliadb_protos.knowledgebox_pb2 import (
|
26
41
|
DeleteKnowledgeBoxResponse,
|
27
|
-
GCKnowledgeBoxResponse,
|
28
42
|
KnowledgeBoxID,
|
29
|
-
KnowledgeBoxNew,
|
30
43
|
KnowledgeBoxResponseStatus,
|
31
44
|
KnowledgeBoxUpdate,
|
32
|
-
Labels,
|
33
|
-
NewKnowledgeBoxResponse,
|
34
45
|
SemanticModelMetadata,
|
35
46
|
UpdateKnowledgeBoxResponse,
|
47
|
+
VectorSetConfig,
|
36
48
|
)
|
37
|
-
from nucliadb_protos.resources_pb2 import CloudFile
|
38
49
|
from nucliadb_protos.writer_pb2 import (
|
39
|
-
BinaryData,
|
40
50
|
BrokerMessage,
|
41
51
|
DelEntitiesRequest,
|
42
|
-
DelLabelsRequest,
|
43
52
|
DelVectorSetRequest,
|
44
|
-
|
45
|
-
FileRequest,
|
46
|
-
FileUploaded,
|
53
|
+
DelVectorSetResponse,
|
47
54
|
GetEntitiesGroupRequest,
|
48
55
|
GetEntitiesGroupResponse,
|
49
56
|
GetEntitiesRequest,
|
50
57
|
GetEntitiesResponse,
|
51
|
-
GetLabelSetRequest,
|
52
|
-
GetLabelSetResponse,
|
53
|
-
GetLabelsRequest,
|
54
|
-
GetLabelsResponse,
|
55
|
-
GetSynonymsResponse,
|
56
|
-
GetVectorSetsRequest,
|
57
|
-
GetVectorSetsResponse,
|
58
58
|
IndexResource,
|
59
59
|
IndexStatus,
|
60
60
|
ListEntitiesGroupsRequest,
|
@@ -63,50 +63,22 @@ from nucliadb_protos.writer_pb2 import (
|
|
63
63
|
ListMembersResponse,
|
64
64
|
NewEntitiesGroupRequest,
|
65
65
|
NewEntitiesGroupResponse,
|
66
|
+
NewVectorSetRequest,
|
67
|
+
NewVectorSetResponse,
|
66
68
|
OpStatusWriter,
|
67
|
-
ResourceFieldExistsResponse,
|
68
|
-
ResourceFieldId,
|
69
|
-
ResourceIdRequest,
|
70
|
-
ResourceIdResponse,
|
71
69
|
SetEntitiesRequest,
|
72
|
-
SetLabelsRequest,
|
73
|
-
SetSynonymsRequest,
|
74
|
-
SetVectorSetRequest,
|
75
|
-
SetVectorsRequest,
|
76
|
-
SetVectorsResponse,
|
77
70
|
UpdateEntitiesGroupRequest,
|
78
71
|
UpdateEntitiesGroupResponse,
|
79
|
-
UploadBinaryData,
|
80
72
|
WriterStatusRequest,
|
81
73
|
WriterStatusResponse,
|
82
74
|
)
|
83
|
-
|
84
|
-
from nucliadb import learning_proxy
|
85
|
-
from nucliadb.common import datamanagers
|
86
|
-
from nucliadb.common.cluster.exceptions import AlreadyExists, EntitiesGroupNotFound
|
87
|
-
from nucliadb.common.cluster.manager import get_index_nodes
|
88
|
-
from nucliadb.common.cluster.utils import get_shard_manager
|
89
|
-
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
90
|
-
from nucliadb.common.maindb.driver import Transaction
|
91
|
-
from nucliadb.common.maindb.utils import setup_driver
|
92
|
-
from nucliadb.ingest import SERVICE_NAME, logger
|
93
|
-
from nucliadb.ingest.orm.entities import EntitiesManager
|
94
|
-
from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict
|
95
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
96
|
-
from nucliadb.ingest.orm.processor import Processor, sequence_manager
|
97
|
-
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
98
|
-
from nucliadb.ingest.settings import settings
|
99
|
-
from nucliadb_protos import utils_pb2, writer_pb2, writer_pb2_grpc
|
100
75
|
from nucliadb_telemetry import errors
|
101
|
-
from nucliadb_utils import
|
102
|
-
from nucliadb_utils.settings import is_onprem_nucliadb, running_settings
|
103
|
-
from nucliadb_utils.storages.storage import Storage, StorageField
|
76
|
+
from nucliadb_utils.settings import is_onprem_nucliadb
|
104
77
|
from nucliadb_utils.utilities import (
|
105
78
|
get_partitioning,
|
106
79
|
get_pubsub,
|
107
80
|
get_storage,
|
108
81
|
get_transaction_utility,
|
109
|
-
has_feature,
|
110
82
|
)
|
111
83
|
|
112
84
|
|
@@ -117,54 +89,64 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
117
89
|
async def initialize(self):
|
118
90
|
self.storage = await get_storage(service_name=SERVICE_NAME)
|
119
91
|
self.driver = await setup_driver()
|
120
|
-
self.proc = Processor(
|
121
|
-
driver=self.driver, storage=self.storage, pubsub=await get_pubsub()
|
122
|
-
)
|
92
|
+
self.proc = Processor(driver=self.driver, storage=self.storage, pubsub=await get_pubsub())
|
123
93
|
self.shards_manager = get_shard_manager()
|
124
94
|
|
125
|
-
async def finalize(self):
|
126
|
-
...
|
127
|
-
|
128
|
-
async def SetVectors( # type: ignore
|
129
|
-
self, request: SetVectorsRequest, context=None
|
130
|
-
) -> SetVectorsResponse:
|
131
|
-
response = SetVectorsResponse()
|
132
|
-
response.found = True
|
133
|
-
|
134
|
-
async with self.driver.transaction() as txn:
|
135
|
-
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
136
|
-
resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
|
95
|
+
async def finalize(self): ...
|
137
96
|
|
138
|
-
|
139
|
-
|
97
|
+
async def NewKnowledgeBoxV2(
|
98
|
+
self, request: writer_pb2.NewKnowledgeBoxV2Request, context=None
|
99
|
+
) -> writer_pb2.NewKnowledgeBoxV2Response:
|
100
|
+
"""v2 of KB creation endpoint. Payload has been refactored and cleaned
|
101
|
+
up to include only necessary fields. It has also been extended to
|
102
|
+
support KB creation with multiple vectorsets
|
103
|
+
"""
|
104
|
+
if is_onprem_nucliadb():
|
105
|
+
logger.error(
|
106
|
+
"Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
|
140
107
|
)
|
141
|
-
|
142
|
-
|
143
|
-
|
108
|
+
return writer_pb2.NewKnowledgeBoxV2Response(
|
109
|
+
status=KnowledgeBoxResponseStatus.ERROR,
|
110
|
+
error_message="This endpoint is only available for hosted. Onprem must use the REST API",
|
111
|
+
)
|
112
|
+
# Hosted KBs are created through backend endpoints. We assume learning
|
113
|
+
# configuration has been already created for it and we are given the
|
114
|
+
# model metadata in the request
|
144
115
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
116
|
+
try:
|
117
|
+
kbid, _ = await KnowledgeBoxORM.create(
|
118
|
+
self.driver,
|
119
|
+
kbid=request.kbid,
|
120
|
+
slug=request.slug,
|
121
|
+
title=request.title,
|
122
|
+
description=request.description,
|
123
|
+
semantic_models={
|
124
|
+
vs.vectorset_id: SemanticModelMetadata(
|
125
|
+
similarity_function=vs.similarity,
|
126
|
+
vector_dimension=vs.vector_dimension,
|
127
|
+
matryoshka_dimensions=vs.matryoshka_dimensions,
|
128
|
+
)
|
129
|
+
for vs in request.vectorsets
|
130
|
+
},
|
131
|
+
external_index_provider=request.external_index_provider,
|
132
|
+
hidden_resources_enabled=request.hidden_resources_enabled,
|
133
|
+
hidden_resources_hide_on_creation=request.hidden_resources_hide_on_creation,
|
134
|
+
)
|
149
135
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
except Exception as e:
|
154
|
-
errors.capture_exception(e)
|
155
|
-
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
136
|
+
except KnowledgeBoxConflict:
|
137
|
+
logger.info("KB already exists", extra={"slug": request.slug})
|
138
|
+
return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.CONFLICT)
|
156
139
|
|
157
|
-
|
140
|
+
except ExternalIndexCreationError as exc:
|
141
|
+
logger.exception(
|
142
|
+
"Error creating external index",
|
143
|
+
extra={"slug": request.slug, "error": str(exc)},
|
144
|
+
)
|
145
|
+
return writer_pb2.NewKnowledgeBoxV2Response(
|
146
|
+
status=KnowledgeBoxResponseStatus.EXTERNAL_INDEX_PROVIDER_ERROR,
|
147
|
+
error_message=exc.message,
|
148
|
+
)
|
158
149
|
|
159
|
-
async def NewKnowledgeBox( # type: ignore
|
160
|
-
self, request: KnowledgeBoxNew, context=None
|
161
|
-
) -> NewKnowledgeBoxResponse:
|
162
|
-
try:
|
163
|
-
kbid = await self.create_kb(request)
|
164
|
-
logger.info("KB created successfully", extra={"kbid": kbid})
|
165
|
-
except KnowledgeBoxConflict:
|
166
|
-
logger.warning("KB already exists", extra={"slug": request.slug})
|
167
|
-
return NewKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.CONFLICT)
|
168
150
|
except Exception as exc:
|
169
151
|
errors.capture_exception(exc)
|
170
152
|
logger.exception(
|
@@ -172,101 +154,50 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
172
154
|
exc_info=True,
|
173
155
|
extra={"slug": request.slug},
|
174
156
|
)
|
175
|
-
return
|
176
|
-
return NewKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK, uuid=kbid)
|
177
|
-
|
178
|
-
async def create_kb(self, request: KnowledgeBoxNew) -> str:
|
179
|
-
if is_onprem_nucliadb():
|
180
|
-
return await self._create_kb_onprem(request)
|
181
|
-
else:
|
182
|
-
return await self._create_kb_hosted(request)
|
157
|
+
return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.ERROR)
|
183
158
|
|
184
|
-
async def _create_kb_onprem(self, request: KnowledgeBoxNew) -> str:
|
185
|
-
"""
|
186
|
-
First, try to get the learning configuration for the new knowledge box.
|
187
|
-
From there we need to extract the semantic model metadata and pass it to the create_kb method.
|
188
|
-
If the kb creation fails, rollback the learning configuration for the kbid that was just created.
|
189
|
-
"""
|
190
|
-
kbid = request.forceuuid or str(uuid.uuid4())
|
191
|
-
release_channel = get_release_channel(request)
|
192
|
-
request.config.release_channel = release_channel
|
193
|
-
lconfig = await learning_proxy.get_configuration(kbid)
|
194
|
-
lconfig_created = False
|
195
|
-
if lconfig is None:
|
196
|
-
if request.learning_config:
|
197
|
-
# We parse the desired configuration from the request and set it
|
198
|
-
config = json.loads(request.learning_config)
|
199
|
-
else:
|
200
|
-
# We set an empty configuration so that learning chooses the default values.
|
201
|
-
config = {}
|
202
|
-
logger.warning(
|
203
|
-
"No learning configuration provided. Default will be used.",
|
204
|
-
extra={"kbid": kbid},
|
205
|
-
)
|
206
|
-
lconfig = await learning_proxy.set_configuration(kbid, config=config)
|
207
|
-
lconfig_created = True
|
208
159
|
else:
|
209
|
-
logger.info("
|
210
|
-
|
211
|
-
await self.proc.create_kb(
|
212
|
-
request.slug,
|
213
|
-
request.config,
|
214
|
-
parse_model_metadata_from_learning_config(lconfig),
|
215
|
-
forceuuid=kbid,
|
216
|
-
release_channel=release_channel,
|
217
|
-
)
|
218
|
-
return kbid
|
219
|
-
except Exception:
|
220
|
-
# Rollback learning config for the kbid that was just created
|
221
|
-
try:
|
222
|
-
if lconfig_created:
|
223
|
-
await learning_proxy.delete_configuration(kbid)
|
224
|
-
except Exception:
|
225
|
-
logger.warning(
|
226
|
-
"Could not rollback learning configuration",
|
227
|
-
exc_info=True,
|
228
|
-
extra={"kbid": kbid},
|
229
|
-
)
|
230
|
-
raise
|
231
|
-
|
232
|
-
async def _create_kb_hosted(self, request: KnowledgeBoxNew) -> str:
|
233
|
-
"""
|
234
|
-
For the hosted case, we assume that the learning configuration
|
235
|
-
is already set and we are given the model metadata in the request.
|
236
|
-
"""
|
237
|
-
kbid = request.forceuuid or str(uuid.uuid4())
|
238
|
-
release_channel = get_release_channel(request)
|
239
|
-
request.config.release_channel = release_channel
|
240
|
-
await self.proc.create_kb(
|
241
|
-
request.slug,
|
242
|
-
request.config,
|
243
|
-
parse_model_metadata_from_request(request),
|
244
|
-
forceuuid=kbid,
|
245
|
-
release_channel=release_channel,
|
246
|
-
)
|
247
|
-
return kbid
|
160
|
+
logger.info("KB created successfully", extra={"kbid": kbid})
|
161
|
+
return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.OK)
|
248
162
|
|
249
163
|
async def UpdateKnowledgeBox( # type: ignore
|
250
164
|
self, request: KnowledgeBoxUpdate, context=None
|
251
165
|
) -> UpdateKnowledgeBoxResponse:
|
252
|
-
|
253
|
-
|
254
|
-
|
166
|
+
if is_onprem_nucliadb():
|
167
|
+
logger.error(
|
168
|
+
"Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
|
169
|
+
)
|
255
170
|
return UpdateKnowledgeBoxResponse(
|
256
|
-
status=KnowledgeBoxResponseStatus.
|
171
|
+
status=KnowledgeBoxResponseStatus.ERROR,
|
257
172
|
)
|
173
|
+
|
174
|
+
try:
|
175
|
+
async with self.driver.transaction() as txn:
|
176
|
+
kbid = await KnowledgeBoxORM.update(
|
177
|
+
txn, uuid=request.uuid, slug=request.slug, config=request.config
|
178
|
+
)
|
179
|
+
await txn.commit()
|
180
|
+
except KnowledgeBoxNotFound:
|
181
|
+
return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.NOTFOUND)
|
258
182
|
except Exception:
|
259
|
-
logger.exception("Could not
|
183
|
+
logger.exception("Could not update KB", exc_info=True)
|
260
184
|
return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
|
261
|
-
return UpdateKnowledgeBoxResponse(
|
262
|
-
status=KnowledgeBoxResponseStatus.OK, uuid=kbid
|
263
|
-
)
|
185
|
+
return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK, uuid=kbid)
|
264
186
|
|
265
187
|
async def DeleteKnowledgeBox( # type: ignore
|
266
188
|
self, request: KnowledgeBoxID, context=None
|
267
189
|
) -> DeleteKnowledgeBoxResponse:
|
190
|
+
if is_onprem_nucliadb():
|
191
|
+
logger.error(
|
192
|
+
"Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
|
193
|
+
)
|
194
|
+
return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
|
195
|
+
|
268
196
|
try:
|
269
|
-
|
197
|
+
kbid = request.uuid
|
198
|
+
# learning configuration is automatically removed in nuclia backend for
|
199
|
+
# hosted users, we don't need to do it
|
200
|
+
await KnowledgeBoxORM.delete(self.driver, kbid=kbid)
|
270
201
|
except KnowledgeBoxNotFound:
|
271
202
|
logger.warning(f"KB not found: kbid={request.uuid}, slug={request.slug}")
|
272
203
|
except Exception:
|
@@ -274,28 +205,6 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
274
205
|
return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
|
275
206
|
return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK)
|
276
207
|
|
277
|
-
async def delete_kb(self, request: KnowledgeBoxID) -> None:
|
278
|
-
kbid = request.uuid
|
279
|
-
await self.proc.delete_kb(kbid, request.slug)
|
280
|
-
# learning configuration is automatically removed in nuclia backend for
|
281
|
-
# hosted users, we only need to remove it for onprem
|
282
|
-
if is_onprem_nucliadb():
|
283
|
-
try:
|
284
|
-
await learning_proxy.delete_configuration(kbid)
|
285
|
-
logger.info("Learning configuration deleted", extra={"kbid": kbid})
|
286
|
-
except Exception:
|
287
|
-
logger.exception(
|
288
|
-
"Unexpected error deleting learning configuration",
|
289
|
-
exc_info=True,
|
290
|
-
extra={"kbid": kbid},
|
291
|
-
)
|
292
|
-
|
293
|
-
async def GCKnowledgeBox( # type: ignore
|
294
|
-
self, request: KnowledgeBoxID, context=None
|
295
|
-
) -> GCKnowledgeBoxResponse:
|
296
|
-
response = GCKnowledgeBoxResponse()
|
297
|
-
return response
|
298
|
-
|
299
208
|
async def ProcessMessage( # type: ignore
|
300
209
|
self, request_stream: AsyncIterator[BrokerMessage], context=None
|
301
210
|
):
|
@@ -313,128 +222,21 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
313
222
|
logger.info(f"Processed {message.uuid}")
|
314
223
|
return response
|
315
224
|
|
316
|
-
async def SetLabels(self, request: SetLabelsRequest, context=None) -> OpStatusWriter: # type: ignore
|
317
|
-
async with self.driver.transaction() as txn:
|
318
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
319
|
-
response = OpStatusWriter()
|
320
|
-
if kbobj is not None:
|
321
|
-
try:
|
322
|
-
await kbobj.set_labelset(request.id, request.labelset)
|
323
|
-
await txn.commit()
|
324
|
-
response.status = OpStatusWriter.Status.OK
|
325
|
-
except Exception as e:
|
326
|
-
errors.capture_exception(e)
|
327
|
-
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
328
|
-
response.status = OpStatusWriter.Status.ERROR
|
329
|
-
else:
|
330
|
-
response.status = OpStatusWriter.Status.NOTFOUND
|
331
|
-
return response
|
332
|
-
|
333
|
-
async def DelLabels(self, request: DelLabelsRequest, context=None) -> OpStatusWriter: # type: ignore
|
334
|
-
async with self.driver.transaction() as txn:
|
335
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
336
|
-
response = OpStatusWriter()
|
337
|
-
if kbobj is not None:
|
338
|
-
try:
|
339
|
-
await kbobj.del_labelset(request.id)
|
340
|
-
await txn.commit()
|
341
|
-
response.status = OpStatusWriter.Status.OK
|
342
|
-
except Exception as e:
|
343
|
-
errors.capture_exception(e)
|
344
|
-
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
345
|
-
response.status = OpStatusWriter.Status.ERROR
|
346
|
-
else:
|
347
|
-
response.status = OpStatusWriter.Status.NOTFOUND
|
348
|
-
|
349
|
-
return response
|
350
|
-
|
351
|
-
async def GetLabels(self, request: GetLabelsRequest, context=None) -> GetLabelsResponse: # type: ignore
|
352
|
-
async with self.driver.transaction() as txn:
|
353
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
354
|
-
labels: Optional[Labels] = None
|
355
|
-
if kbobj is not None:
|
356
|
-
labels = await kbobj.get_labels()
|
357
|
-
response = GetLabelsResponse()
|
358
|
-
if kbobj is None:
|
359
|
-
response.status = GetLabelsResponse.Status.NOTFOUND
|
360
|
-
else:
|
361
|
-
response.kb.uuid = kbobj.kbid
|
362
|
-
if labels is not None:
|
363
|
-
response.labels.CopyFrom(labels)
|
364
|
-
|
365
|
-
return response
|
366
|
-
|
367
|
-
async def GetLabelSet( # type: ignore
|
368
|
-
self, request: GetLabelSetRequest, context=None
|
369
|
-
) -> GetLabelSetResponse:
|
370
|
-
async with self.driver.transaction() as txn:
|
371
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
372
|
-
response = GetLabelSetResponse()
|
373
|
-
if kbobj is not None:
|
374
|
-
await kbobj.get_labelset(request.labelset, response)
|
375
|
-
response.kb.uuid = kbobj.kbid
|
376
|
-
response.status = GetLabelSetResponse.Status.OK
|
377
|
-
else:
|
378
|
-
response.status = GetLabelSetResponse.Status.NOTFOUND
|
379
|
-
return response
|
380
|
-
|
381
|
-
async def GetVectorSets( # type: ignore
|
382
|
-
self, request: GetVectorSetsRequest, context=None
|
383
|
-
) -> GetVectorSetsResponse:
|
384
|
-
async with self.driver.transaction() as txn:
|
385
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
386
|
-
response = GetVectorSetsResponse()
|
387
|
-
if kbobj is not None:
|
388
|
-
await kbobj.get_vectorsets(response)
|
389
|
-
response.kb.uuid = kbobj.kbid
|
390
|
-
response.status = GetVectorSetsResponse.Status.OK
|
391
|
-
else:
|
392
|
-
response.status = GetVectorSetsResponse.Status.NOTFOUND
|
393
|
-
return response
|
394
|
-
|
395
|
-
async def DelVectorSet( # type: ignore
|
396
|
-
self, request: DelVectorSetRequest, context=None
|
397
|
-
) -> OpStatusWriter:
|
398
|
-
async with self.driver.transaction() as txn:
|
399
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
400
|
-
response = OpStatusWriter()
|
401
|
-
if kbobj is not None:
|
402
|
-
await kbobj.del_vectorset(request.vectorset)
|
403
|
-
response.status = OpStatusWriter.Status.OK
|
404
|
-
await txn.commit()
|
405
|
-
else:
|
406
|
-
response.status = OpStatusWriter.Status.NOTFOUND
|
407
|
-
return response
|
408
|
-
|
409
|
-
async def SetVectorSet( # type: ignore
|
410
|
-
self, request: SetVectorSetRequest, context=None
|
411
|
-
) -> OpStatusWriter:
|
412
|
-
async with self.driver.transaction() as txn:
|
413
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
414
|
-
response = OpStatusWriter()
|
415
|
-
if kbobj is not None:
|
416
|
-
await kbobj.set_vectorset(request.id, request.vectorset)
|
417
|
-
response.status = OpStatusWriter.Status.OK
|
418
|
-
await txn.commit()
|
419
|
-
else:
|
420
|
-
response.status = OpStatusWriter.Status.NOTFOUND
|
421
|
-
return response
|
422
|
-
|
423
225
|
async def NewEntitiesGroup( # type: ignore
|
424
226
|
self, request: NewEntitiesGroupRequest, context=None
|
425
227
|
) -> NewEntitiesGroupResponse:
|
426
228
|
response = NewEntitiesGroupResponse()
|
427
|
-
async with self.driver.transaction() as
|
428
|
-
kbobj = await self.proc.get_kb_obj(
|
229
|
+
async with self.driver.transaction(read_only=True) as ro_txn:
|
230
|
+
kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
|
429
231
|
if kbobj is None:
|
430
232
|
response.status = NewEntitiesGroupResponse.Status.KB_NOT_FOUND
|
431
233
|
return response
|
432
234
|
|
235
|
+
async with self.driver.transaction() as txn:
|
236
|
+
kbobj.txn = txn
|
433
237
|
entities_manager = EntitiesManager(kbobj, txn)
|
434
238
|
try:
|
435
|
-
await entities_manager.create_entities_group(
|
436
|
-
request.group, request.entities
|
437
|
-
)
|
239
|
+
await entities_manager.create_entities_group(request.group, request.entities)
|
438
240
|
except AlreadyExists:
|
439
241
|
response.status = NewEntitiesGroupResponse.Status.ALREADY_EXISTS
|
440
242
|
return response
|
@@ -447,9 +249,8 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
447
249
|
self, request: GetEntitiesRequest, context=None
|
448
250
|
) -> GetEntitiesResponse:
|
449
251
|
response = GetEntitiesResponse()
|
450
|
-
async with self.driver.transaction() as txn:
|
252
|
+
async with self.driver.transaction(read_only=True) as txn:
|
451
253
|
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
452
|
-
|
453
254
|
if kbobj is None:
|
454
255
|
response.status = GetEntitiesResponse.Status.NOTFOUND
|
455
256
|
return response
|
@@ -470,9 +271,8 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
470
271
|
self, request: ListEntitiesGroupsRequest, context=None
|
471
272
|
) -> ListEntitiesGroupsResponse:
|
472
273
|
response = ListEntitiesGroupsResponse()
|
473
|
-
async with self.driver.transaction() as txn:
|
274
|
+
async with self.driver.transaction(read_only=True) as txn:
|
474
275
|
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
475
|
-
|
476
276
|
if kbobj is None:
|
477
277
|
response.status = ListEntitiesGroupsResponse.Status.NOTFOUND
|
478
278
|
return response
|
@@ -495,7 +295,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
495
295
|
self, request: GetEntitiesGroupRequest, context=None
|
496
296
|
) -> GetEntitiesGroupResponse:
|
497
297
|
response = GetEntitiesGroupResponse()
|
498
|
-
async with self.driver.transaction() as txn:
|
298
|
+
async with self.driver.transaction(read_only=True) as txn:
|
499
299
|
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
500
300
|
if kbobj is None:
|
501
301
|
response.status = GetEntitiesGroupResponse.Status.KB_NOT_FOUND
|
@@ -503,9 +303,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
503
303
|
|
504
304
|
entities_manager = EntitiesManager(kbobj, txn)
|
505
305
|
try:
|
506
|
-
entities_group = await entities_manager.get_entities_group(
|
507
|
-
request.group
|
508
|
-
)
|
306
|
+
entities_group = await entities_manager.get_entities_group(request.group)
|
509
307
|
except Exception as e:
|
510
308
|
errors.capture_exception(e)
|
511
309
|
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
@@ -513,9 +311,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
513
311
|
else:
|
514
312
|
response.kb.uuid = kbobj.kbid
|
515
313
|
if entities_group is None:
|
516
|
-
response.status =
|
517
|
-
GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
|
518
|
-
)
|
314
|
+
response.status = GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
|
519
315
|
else:
|
520
316
|
response.status = GetEntitiesGroupResponse.Status.OK
|
521
317
|
response.group.CopyFrom(entities_group)
|
@@ -524,17 +320,17 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
524
320
|
|
525
321
|
async def SetEntities(self, request: SetEntitiesRequest, context=None) -> OpStatusWriter: # type: ignore
|
526
322
|
response = OpStatusWriter()
|
527
|
-
async with self.driver.transaction() as
|
528
|
-
kbobj = await self.proc.get_kb_obj(
|
323
|
+
async with self.driver.transaction(read_only=True) as ro_txn:
|
324
|
+
kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
|
529
325
|
if kbobj is None:
|
530
326
|
response.status = OpStatusWriter.Status.NOTFOUND
|
531
327
|
return response
|
532
328
|
|
329
|
+
async with self.driver.transaction() as txn:
|
330
|
+
kbobj.txn = txn
|
533
331
|
entities_manager = EntitiesManager(kbobj, txn)
|
534
332
|
try:
|
535
|
-
await entities_manager.set_entities_group(
|
536
|
-
request.group, request.entities
|
537
|
-
)
|
333
|
+
await entities_manager.set_entities_group(request.group, request.entities)
|
538
334
|
except Exception as e:
|
539
335
|
errors.capture_exception(e)
|
540
336
|
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
@@ -548,14 +344,15 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
548
344
|
self, request: UpdateEntitiesGroupRequest, context=None
|
549
345
|
) -> UpdateEntitiesGroupResponse:
|
550
346
|
response = UpdateEntitiesGroupResponse()
|
551
|
-
async with self.driver.transaction() as
|
552
|
-
kbobj = await self.proc.get_kb_obj(
|
347
|
+
async with self.driver.transaction(read_only=True) as ro_txn:
|
348
|
+
kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
|
553
349
|
if kbobj is None:
|
554
350
|
response.status = UpdateEntitiesGroupResponse.Status.KB_NOT_FOUND
|
555
351
|
return response
|
556
352
|
|
353
|
+
async with self.driver.transaction() as txn:
|
354
|
+
kbobj.txn = txn
|
557
355
|
entities_manager = EntitiesManager(kbobj, txn)
|
558
|
-
|
559
356
|
try:
|
560
357
|
await entities_manager.set_entities_group_metadata(
|
561
358
|
request.group,
|
@@ -566,9 +363,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
566
363
|
await entities_manager.update_entities(request.group, updates)
|
567
364
|
await entities_manager.delete_entities(request.group, request.delete) # type: ignore
|
568
365
|
except EntitiesGroupNotFound:
|
569
|
-
response.status =
|
570
|
-
UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
|
571
|
-
)
|
366
|
+
response.status = UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
|
572
367
|
return response
|
573
368
|
|
574
369
|
await txn.commit()
|
@@ -577,12 +372,15 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
577
372
|
|
578
373
|
async def DelEntities(self, request: DelEntitiesRequest, context=None) -> OpStatusWriter: # type: ignore
|
579
374
|
response = OpStatusWriter()
|
580
|
-
|
581
|
-
|
375
|
+
|
376
|
+
async with self.driver.transaction(read_only=True) as ro_txn:
|
377
|
+
kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
|
582
378
|
if kbobj is None:
|
583
379
|
response.status = OpStatusWriter.Status.NOTFOUND
|
584
380
|
return response
|
585
381
|
|
382
|
+
async with self.driver.transaction() as txn:
|
383
|
+
kbobj.txn = txn
|
586
384
|
entities_manager = EntitiesManager(kbobj, txn)
|
587
385
|
try:
|
588
386
|
await entities_manager.delete_entities_group(request.group)
|
@@ -595,77 +393,12 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
595
393
|
response.status = OpStatusWriter.Status.OK
|
596
394
|
return response
|
597
395
|
|
598
|
-
async def GetSynonyms( # type: ignore
|
599
|
-
self, request: KnowledgeBoxID, context=None
|
600
|
-
) -> GetSynonymsResponse:
|
601
|
-
kbid = request
|
602
|
-
response = GetSynonymsResponse()
|
603
|
-
txn: Transaction
|
604
|
-
async with self.driver.transaction() as txn:
|
605
|
-
kbobj = await self.proc.get_kb_obj(txn, kbid)
|
606
|
-
if kbobj is None:
|
607
|
-
response.status.status = OpStatusWriter.Status.NOTFOUND
|
608
|
-
return response
|
609
|
-
try:
|
610
|
-
await kbobj.get_synonyms(response.synonyms)
|
611
|
-
response.status.status = OpStatusWriter.Status.OK
|
612
|
-
return response
|
613
|
-
except Exception as e:
|
614
|
-
errors.capture_exception(e)
|
615
|
-
logger.exception("Errors getting synonyms")
|
616
|
-
response.status.status = OpStatusWriter.Status.ERROR
|
617
|
-
return response
|
618
|
-
|
619
|
-
async def SetSynonyms( # type: ignore
|
620
|
-
self, request: SetSynonymsRequest, context=None
|
621
|
-
) -> OpStatusWriter:
|
622
|
-
kbid = request.kbid
|
623
|
-
response = OpStatusWriter()
|
624
|
-
txn: Transaction
|
625
|
-
async with self.driver.transaction() as txn:
|
626
|
-
kbobj = await self.proc.get_kb_obj(txn, kbid)
|
627
|
-
if kbobj is None:
|
628
|
-
response.status = OpStatusWriter.Status.NOTFOUND
|
629
|
-
return response
|
630
|
-
try:
|
631
|
-
await kbobj.set_synonyms(request.synonyms)
|
632
|
-
await txn.commit()
|
633
|
-
response.status = OpStatusWriter.Status.OK
|
634
|
-
return response
|
635
|
-
except Exception as e:
|
636
|
-
errors.capture_exception(e)
|
637
|
-
logger.exception("Errors setting synonyms")
|
638
|
-
response.status = OpStatusWriter.Status.ERROR
|
639
|
-
return response
|
640
|
-
|
641
|
-
async def DelSynonyms( # type: ignore
|
642
|
-
self, request: KnowledgeBoxID, context=None
|
643
|
-
) -> OpStatusWriter:
|
644
|
-
kbid = request
|
645
|
-
response = OpStatusWriter()
|
646
|
-
txn: Transaction
|
647
|
-
async with self.driver.transaction() as txn:
|
648
|
-
kbobj = await self.proc.get_kb_obj(txn, kbid)
|
649
|
-
if kbobj is None:
|
650
|
-
response.status = OpStatusWriter.Status.NOTFOUND
|
651
|
-
return response
|
652
|
-
try:
|
653
|
-
await kbobj.delete_synonyms()
|
654
|
-
await txn.commit()
|
655
|
-
response.status = OpStatusWriter.Status.OK
|
656
|
-
return response
|
657
|
-
except Exception as e:
|
658
|
-
errors.capture_exception(e)
|
659
|
-
logger.exception("Errors deleting synonyms")
|
660
|
-
response.status = OpStatusWriter.Status.ERROR
|
661
|
-
return response
|
662
|
-
|
663
396
|
async def Status( # type: ignore
|
664
397
|
self, request: WriterStatusRequest, context=None
|
665
398
|
) -> WriterStatusResponse:
|
666
399
|
logger.info("Status Call")
|
667
400
|
response = WriterStatusResponse()
|
668
|
-
async with self.driver.transaction() as txn:
|
401
|
+
async with self.driver.transaction(read_only=True) as txn:
|
669
402
|
async for _, slug in datamanagers.kb.get_kbs(txn):
|
670
403
|
response.knowledgeboxes.append(slug)
|
671
404
|
|
@@ -695,60 +428,11 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
695
428
|
)
|
696
429
|
return response
|
697
430
|
|
698
|
-
async def GetResourceId( # type: ignore
|
699
|
-
self, request: ResourceIdRequest, context=None
|
700
|
-
) -> ResourceIdResponse:
|
701
|
-
response = ResourceIdResponse()
|
702
|
-
response.uuid = ""
|
703
|
-
async with self.driver.transaction() as txn:
|
704
|
-
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
705
|
-
rid = await kbobj.get_resource_uuid_by_slug(request.slug)
|
706
|
-
if rid:
|
707
|
-
response.uuid = rid
|
708
|
-
return response
|
709
|
-
|
710
|
-
async def ResourceFieldExists( # type: ignore
|
711
|
-
self, request: ResourceFieldId, context=None
|
712
|
-
) -> ResourceFieldExistsResponse:
|
713
|
-
response = ResourceFieldExistsResponse()
|
714
|
-
response.found = False
|
715
|
-
resobj = None
|
716
|
-
async with self.driver.transaction() as txn:
|
717
|
-
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
718
|
-
resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
|
719
|
-
|
720
|
-
if request.field != "":
|
721
|
-
field = await resobj.get_field(
|
722
|
-
request.field, request.field_type, load=True
|
723
|
-
)
|
724
|
-
if field.value is not None:
|
725
|
-
response.found = True
|
726
|
-
else:
|
727
|
-
response.found = False
|
728
|
-
return response
|
729
|
-
|
730
|
-
if request.rid != "":
|
731
|
-
if await resobj.exists():
|
732
|
-
response.found = True
|
733
|
-
else:
|
734
|
-
response.found = False
|
735
|
-
return response
|
736
|
-
|
737
|
-
if request.kbid != "":
|
738
|
-
config = await datamanagers.kb.get_config(txn, kbid=request.kbid)
|
739
|
-
if config is not None:
|
740
|
-
response.found = True
|
741
|
-
else:
|
742
|
-
response.found = False
|
743
|
-
return response
|
744
|
-
|
745
|
-
return response
|
746
|
-
|
747
431
|
async def Index(self, request: IndexResource, context=None) -> IndexStatus: # type: ignore
|
748
432
|
async with self.driver.transaction() as txn:
|
749
433
|
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
750
434
|
resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
|
751
|
-
bm = await
|
435
|
+
bm = await generate_broker_message(resobj)
|
752
436
|
transaction = get_transaction_utility()
|
753
437
|
partitioning = get_partitioning()
|
754
438
|
partition = partitioning.generate_partition(request.kbid, request.rid)
|
@@ -763,42 +447,25 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
763
447
|
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
764
448
|
resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
|
765
449
|
resobj.disable_vectors = not request.reindex_vectors
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
if shard is None:
|
776
|
-
shard = await self.shards_manager.get_current_active_shard(
|
777
|
-
txn, request.kbid
|
778
|
-
)
|
779
|
-
if shard is None:
|
780
|
-
# no shard currently exists, create one
|
781
|
-
model = await datamanagers.kb.get_model_metadata(
|
782
|
-
txn, kbid=request.kbid
|
783
|
-
)
|
784
|
-
shard = await self.shards_manager.create_shard_by_kbid(
|
785
|
-
txn, request.kbid, semantic_model=model
|
786
|
-
)
|
787
|
-
|
788
|
-
await datamanagers.resources.set_resource_shard_id(
|
789
|
-
txn, kbid=request.kbid, rid=request.rid, shard=shard.shard
|
450
|
+
brain = await resobj.generate_index_message(reindex=True)
|
451
|
+
shard = await self.proc.get_or_assign_resource_shard(txn, kbobj, request.rid)
|
452
|
+
index_message = brain.brain
|
453
|
+
external_index_manager = await get_external_index_manager(kbid=request.kbid)
|
454
|
+
if external_index_manager is not None:
|
455
|
+
await self.proc.external_index_add_resource(
|
456
|
+
request.kbid,
|
457
|
+
request.rid,
|
458
|
+
index_message,
|
790
459
|
)
|
791
|
-
|
792
|
-
if shard is not None:
|
460
|
+
else:
|
793
461
|
await self.shards_manager.add_resource(
|
794
462
|
shard,
|
795
|
-
|
463
|
+
index_message,
|
796
464
|
0,
|
797
465
|
partition=self.partitions[0],
|
798
466
|
kb=request.kbid,
|
799
467
|
reindex_id=uuid.uuid4().hex,
|
800
468
|
)
|
801
|
-
|
802
469
|
response = IndexStatus()
|
803
470
|
return response
|
804
471
|
except Exception as e:
|
@@ -806,109 +473,51 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
806
473
|
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
807
474
|
raise
|
808
475
|
|
809
|
-
async def
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
476
|
+
async def NewVectorSet( # type: ignore
|
477
|
+
self, request: NewVectorSetRequest, context=None
|
478
|
+
) -> NewVectorSetResponse:
|
479
|
+
config = VectorSetConfig(
|
480
|
+
vectorset_id=request.vectorset_id,
|
481
|
+
vectorset_index_config=nodewriter_pb2.VectorIndexConfig(
|
482
|
+
similarity=request.similarity,
|
483
|
+
normalize_vectors=request.normalize_vectors,
|
484
|
+
vector_type=request.vector_type,
|
485
|
+
vector_dimension=request.vector_dimension,
|
486
|
+
),
|
487
|
+
matryoshka_dimensions=request.matryoshka_dimensions,
|
488
|
+
)
|
489
|
+
response = NewVectorSetResponse()
|
490
|
+
try:
|
491
|
+
async with self.driver.transaction() as txn:
|
492
|
+
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
493
|
+
await kbobj.create_vectorset(config)
|
494
|
+
await txn.commit()
|
495
|
+
except VectorSetConflict as exc:
|
496
|
+
response.status = NewVectorSetResponse.Status.ERROR
|
497
|
+
response.details = str(exc)
|
498
|
+
except Exception as exc:
|
499
|
+
errors.capture_exception(exc)
|
500
|
+
logger.error("Error in ingest gRPC while creating a vectorset", exc_info=True)
|
501
|
+
response.status = NewVectorSetResponse.Status.ERROR
|
502
|
+
response.details = str(exc)
|
503
|
+
else:
|
504
|
+
response.status = NewVectorSetResponse.Status.OK
|
505
|
+
return response
|
815
506
|
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
507
|
+
async def DelVectorSet( # type: ignore
|
508
|
+
self, request: DelVectorSetRequest, context=None
|
509
|
+
) -> DelVectorSetResponse:
|
510
|
+
response = DelVectorSetResponse()
|
511
|
+
try:
|
512
|
+
async with self.driver.transaction() as txn:
|
513
|
+
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
514
|
+
await kbobj.delete_vectorset(request.vectorset_id)
|
515
|
+
await txn.commit()
|
516
|
+
except Exception as exc:
|
517
|
+
errors.capture_exception(exc)
|
518
|
+
logger.error("Error in ingest gRPC while deleting a vectorset", exc_info=True)
|
519
|
+
response.status = DelVectorSetResponse.Status.ERROR
|
520
|
+
response.details = str(exc)
|
827
521
|
else:
|
828
|
-
|
829
|
-
|
830
|
-
async def generate_buffer(
|
831
|
-
storage: Storage, request: AsyncIterator[UploadBinaryData] # type: ignore
|
832
|
-
):
|
833
|
-
# Storage requires uploading chunks of a specified size, this is
|
834
|
-
# why we need to have an intermediate buffer
|
835
|
-
buf = BytesIO()
|
836
|
-
async for chunk in request:
|
837
|
-
if not chunk.HasField("payload"):
|
838
|
-
raise AttributeError("Payload not found")
|
839
|
-
buf.write(chunk.payload)
|
840
|
-
while buf.tell() > storage.chunk_size:
|
841
|
-
buf.seek(0)
|
842
|
-
data = buf.read(storage.chunk_size)
|
843
|
-
if len(data):
|
844
|
-
yield data
|
845
|
-
old_data = buf.read()
|
846
|
-
buf = BytesIO()
|
847
|
-
buf.write(old_data)
|
848
|
-
buf.seek(0)
|
849
|
-
data = buf.read()
|
850
|
-
if len(data):
|
851
|
-
yield data
|
852
|
-
|
853
|
-
if destination is None:
|
854
|
-
raise AttributeError("No destination file")
|
855
|
-
await self.storage.uploaditerator(
|
856
|
-
generate_buffer(self.storage, request), destination, cf
|
857
|
-
)
|
858
|
-
result = FileUploaded()
|
859
|
-
return result
|
860
|
-
|
861
|
-
|
862
|
-
LEARNING_SIMILARITY_FUNCTION_TO_PROTO = {
|
863
|
-
"cosine": utils_pb2.VectorSimilarity.COSINE,
|
864
|
-
"dot": utils_pb2.VectorSimilarity.DOT,
|
865
|
-
}
|
866
|
-
|
867
|
-
|
868
|
-
def parse_model_metadata_from_learning_config(
|
869
|
-
lconfig: learning_proxy.LearningConfiguration,
|
870
|
-
) -> SemanticModelMetadata:
|
871
|
-
model = SemanticModelMetadata()
|
872
|
-
model.similarity_function = LEARNING_SIMILARITY_FUNCTION_TO_PROTO[
|
873
|
-
lconfig.semantic_vector_similarity
|
874
|
-
]
|
875
|
-
if lconfig.semantic_vector_size is not None:
|
876
|
-
model.vector_dimension = lconfig.semantic_vector_size
|
877
|
-
else:
|
878
|
-
logger.warning("Vector dimension not set!")
|
879
|
-
if lconfig.semantic_threshold is not None:
|
880
|
-
model.default_min_score = lconfig.semantic_threshold
|
881
|
-
else:
|
882
|
-
logger.warning("Default min score not set!")
|
883
|
-
return model
|
884
|
-
|
885
|
-
|
886
|
-
def parse_model_metadata_from_request(
|
887
|
-
request: KnowledgeBoxNew,
|
888
|
-
) -> SemanticModelMetadata:
|
889
|
-
model = SemanticModelMetadata()
|
890
|
-
model.similarity_function = request.similarity
|
891
|
-
if request.HasField("vector_dimension"):
|
892
|
-
model.vector_dimension = request.vector_dimension
|
893
|
-
else:
|
894
|
-
logger.warning(
|
895
|
-
"Vector dimension not set. Will be detected automatically on the first vector set."
|
896
|
-
)
|
897
|
-
if request.HasField("default_min_score"):
|
898
|
-
model.default_min_score = request.default_min_score
|
899
|
-
else:
|
900
|
-
logger.warning("Default min score not set!")
|
901
|
-
return model
|
902
|
-
|
903
|
-
|
904
|
-
def get_release_channel(request: KnowledgeBoxNew) -> utils_pb2.ReleaseChannel.ValueType:
|
905
|
-
"""
|
906
|
-
Set channel to Experimental if specified in the grpc request or if the requested
|
907
|
-
slug has the experimental_kb feature enabled in stage environment.
|
908
|
-
"""
|
909
|
-
release_channel = request.release_channel
|
910
|
-
if running_settings.running_environment == "stage" and has_feature(
|
911
|
-
const.Features.EXPERIMENTAL_KB, context={"slug": request.slug}
|
912
|
-
):
|
913
|
-
release_channel = utils_pb2.ReleaseChannel.EXPERIMENTAL
|
914
|
-
return release_channel
|
522
|
+
response.status = DelVectorSetResponse.Status.OK
|
523
|
+
return response
|