nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,40 +17,44 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
import json
|
21
20
|
import uuid
|
22
|
-
from
|
23
|
-
from typing import AsyncIterator, Optional
|
21
|
+
from typing import AsyncIterator
|
24
22
|
|
23
|
+
from nucliadb.common import datamanagers
|
24
|
+
from nucliadb.common.cluster.exceptions import AlreadyExists, EntitiesGroupNotFound
|
25
|
+
from nucliadb.common.cluster.manager import get_index_nodes
|
26
|
+
from nucliadb.common.cluster.utils import get_shard_manager
|
27
|
+
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
28
|
+
from nucliadb.common.external_index_providers.exceptions import ExternalIndexCreationError
|
29
|
+
from nucliadb.common.external_index_providers.manager import get_external_index_manager
|
30
|
+
from nucliadb.common.maindb.utils import setup_driver
|
31
|
+
from nucliadb.ingest import SERVICE_NAME, logger
|
32
|
+
from nucliadb.ingest.orm.broker_message import generate_broker_message
|
33
|
+
from nucliadb.ingest.orm.entities import EntitiesManager
|
34
|
+
from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict, VectorSetConflict
|
35
|
+
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
36
|
+
from nucliadb.ingest.orm.processor import Processor, sequence_manager
|
37
|
+
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
38
|
+
from nucliadb.ingest.settings import settings
|
39
|
+
from nucliadb_protos import nodewriter_pb2, writer_pb2, writer_pb2_grpc
|
25
40
|
from nucliadb_protos.knowledgebox_pb2 import (
|
26
41
|
DeleteKnowledgeBoxResponse,
|
27
|
-
GCKnowledgeBoxResponse,
|
28
42
|
KnowledgeBoxID,
|
29
|
-
KnowledgeBoxNew,
|
30
43
|
KnowledgeBoxResponseStatus,
|
31
44
|
KnowledgeBoxUpdate,
|
32
|
-
Labels,
|
33
|
-
NewKnowledgeBoxResponse,
|
34
45
|
SemanticModelMetadata,
|
35
46
|
UpdateKnowledgeBoxResponse,
|
47
|
+
VectorSetConfig,
|
36
48
|
)
|
37
|
-
from nucliadb_protos.resources_pb2 import CloudFile
|
38
49
|
from nucliadb_protos.writer_pb2 import (
|
39
|
-
BinaryData,
|
40
50
|
BrokerMessage,
|
41
51
|
DelEntitiesRequest,
|
42
|
-
|
43
|
-
|
44
|
-
FileRequest,
|
45
|
-
FileUploaded,
|
52
|
+
DelVectorSetRequest,
|
53
|
+
DelVectorSetResponse,
|
46
54
|
GetEntitiesGroupRequest,
|
47
55
|
GetEntitiesGroupResponse,
|
48
56
|
GetEntitiesRequest,
|
49
57
|
GetEntitiesResponse,
|
50
|
-
GetLabelSetRequest,
|
51
|
-
GetLabelSetResponse,
|
52
|
-
GetLabelsRequest,
|
53
|
-
GetLabelsResponse,
|
54
58
|
IndexResource,
|
55
59
|
IndexStatus,
|
56
60
|
ListEntitiesGroupsRequest,
|
@@ -59,43 +63,22 @@ from nucliadb_protos.writer_pb2 import (
|
|
59
63
|
ListMembersResponse,
|
60
64
|
NewEntitiesGroupRequest,
|
61
65
|
NewEntitiesGroupResponse,
|
66
|
+
NewVectorSetRequest,
|
67
|
+
NewVectorSetResponse,
|
62
68
|
OpStatusWriter,
|
63
69
|
SetEntitiesRequest,
|
64
|
-
SetLabelsRequest,
|
65
|
-
SetVectorsRequest,
|
66
|
-
SetVectorsResponse,
|
67
70
|
UpdateEntitiesGroupRequest,
|
68
71
|
UpdateEntitiesGroupResponse,
|
69
|
-
UploadBinaryData,
|
70
72
|
WriterStatusRequest,
|
71
73
|
WriterStatusResponse,
|
72
74
|
)
|
73
|
-
|
74
|
-
from nucliadb import learning_proxy
|
75
|
-
from nucliadb.common import datamanagers
|
76
|
-
from nucliadb.common.cluster.exceptions import AlreadyExists, EntitiesGroupNotFound
|
77
|
-
from nucliadb.common.cluster.manager import get_index_nodes
|
78
|
-
from nucliadb.common.cluster.utils import get_shard_manager
|
79
|
-
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
80
|
-
from nucliadb.common.maindb.utils import setup_driver
|
81
|
-
from nucliadb.ingest import SERVICE_NAME, logger
|
82
|
-
from nucliadb.ingest.orm.entities import EntitiesManager
|
83
|
-
from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict
|
84
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
|
85
|
-
from nucliadb.ingest.orm.processor import Processor, sequence_manager
|
86
|
-
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
87
|
-
from nucliadb.ingest.settings import settings
|
88
|
-
from nucliadb_protos import utils_pb2, writer_pb2, writer_pb2_grpc
|
89
75
|
from nucliadb_telemetry import errors
|
90
|
-
from nucliadb_utils import
|
91
|
-
from nucliadb_utils.settings import is_onprem_nucliadb, running_settings
|
92
|
-
from nucliadb_utils.storages.storage import Storage, StorageField
|
76
|
+
from nucliadb_utils.settings import is_onprem_nucliadb
|
93
77
|
from nucliadb_utils.utilities import (
|
94
78
|
get_partitioning,
|
95
79
|
get_pubsub,
|
96
80
|
get_storage,
|
97
81
|
get_transaction_utility,
|
98
|
-
has_feature,
|
99
82
|
)
|
100
83
|
|
101
84
|
|
@@ -106,53 +89,64 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
106
89
|
async def initialize(self):
|
107
90
|
self.storage = await get_storage(service_name=SERVICE_NAME)
|
108
91
|
self.driver = await setup_driver()
|
109
|
-
self.proc = Processor(
|
110
|
-
driver=self.driver, storage=self.storage, pubsub=await get_pubsub()
|
111
|
-
)
|
92
|
+
self.proc = Processor(driver=self.driver, storage=self.storage, pubsub=await get_pubsub())
|
112
93
|
self.shards_manager = get_shard_manager()
|
113
94
|
|
114
95
|
async def finalize(self): ...
|
115
96
|
|
116
|
-
async def
|
117
|
-
self, request:
|
118
|
-
) ->
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
field = await resobj.get_field(
|
127
|
-
request.field.field, request.field.field_type, load=True
|
97
|
+
async def NewKnowledgeBoxV2(
|
98
|
+
self, request: writer_pb2.NewKnowledgeBoxV2Request, context=None
|
99
|
+
) -> writer_pb2.NewKnowledgeBoxV2Response:
|
100
|
+
"""v2 of KB creation endpoint. Payload has been refactored and cleaned
|
101
|
+
up to include only necessary fields. It has also been extended to
|
102
|
+
support KB creation with multiple vectorsets
|
103
|
+
"""
|
104
|
+
if is_onprem_nucliadb():
|
105
|
+
logger.error(
|
106
|
+
"Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
|
128
107
|
)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
logger.debug(f"Setting {len(request.vectors.vectors.vectors)} vectors")
|
137
|
-
|
138
|
-
try:
|
139
|
-
await field.set_vectors(evw)
|
140
|
-
await txn.commit()
|
141
|
-
except Exception as e:
|
142
|
-
errors.capture_exception(e)
|
143
|
-
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
144
|
-
|
145
|
-
return response
|
108
|
+
return writer_pb2.NewKnowledgeBoxV2Response(
|
109
|
+
status=KnowledgeBoxResponseStatus.ERROR,
|
110
|
+
error_message="This endpoint is only available for hosted. Onprem must use the REST API",
|
111
|
+
)
|
112
|
+
# Hosted KBs are created through backend endpoints. We assume learning
|
113
|
+
# configuration has been already created for it and we are given the
|
114
|
+
# model metadata in the request
|
146
115
|
|
147
|
-
async def NewKnowledgeBox( # type: ignore
|
148
|
-
self, request: KnowledgeBoxNew, context=None
|
149
|
-
) -> NewKnowledgeBoxResponse:
|
150
116
|
try:
|
151
|
-
kbid = await
|
152
|
-
|
117
|
+
kbid, _ = await KnowledgeBoxORM.create(
|
118
|
+
self.driver,
|
119
|
+
kbid=request.kbid,
|
120
|
+
slug=request.slug,
|
121
|
+
title=request.title,
|
122
|
+
description=request.description,
|
123
|
+
semantic_models={
|
124
|
+
vs.vectorset_id: SemanticModelMetadata(
|
125
|
+
similarity_function=vs.similarity,
|
126
|
+
vector_dimension=vs.vector_dimension,
|
127
|
+
matryoshka_dimensions=vs.matryoshka_dimensions,
|
128
|
+
)
|
129
|
+
for vs in request.vectorsets
|
130
|
+
},
|
131
|
+
external_index_provider=request.external_index_provider,
|
132
|
+
hidden_resources_enabled=request.hidden_resources_enabled,
|
133
|
+
hidden_resources_hide_on_creation=request.hidden_resources_hide_on_creation,
|
134
|
+
)
|
135
|
+
|
153
136
|
except KnowledgeBoxConflict:
|
154
137
|
logger.info("KB already exists", extra={"slug": request.slug})
|
155
|
-
return
|
138
|
+
return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.CONFLICT)
|
139
|
+
|
140
|
+
except ExternalIndexCreationError as exc:
|
141
|
+
logger.exception(
|
142
|
+
"Error creating external index",
|
143
|
+
extra={"slug": request.slug, "error": str(exc)},
|
144
|
+
)
|
145
|
+
return writer_pb2.NewKnowledgeBoxV2Response(
|
146
|
+
status=KnowledgeBoxResponseStatus.EXTERNAL_INDEX_PROVIDER_ERROR,
|
147
|
+
error_message=exc.message,
|
148
|
+
)
|
149
|
+
|
156
150
|
except Exception as exc:
|
157
151
|
errors.capture_exception(exc)
|
158
152
|
logger.exception(
|
@@ -160,101 +154,50 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
160
154
|
exc_info=True,
|
161
155
|
extra={"slug": request.slug},
|
162
156
|
)
|
163
|
-
return
|
164
|
-
return NewKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK, uuid=kbid)
|
157
|
+
return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.ERROR)
|
165
158
|
|
166
|
-
async def create_kb(self, request: KnowledgeBoxNew) -> str:
|
167
|
-
if is_onprem_nucliadb():
|
168
|
-
return await self._create_kb_onprem(request)
|
169
159
|
else:
|
170
|
-
|
171
|
-
|
172
|
-
async def _create_kb_onprem(self, request: KnowledgeBoxNew) -> str:
|
173
|
-
"""
|
174
|
-
First, try to get the learning configuration for the new knowledge box.
|
175
|
-
From there we need to extract the semantic model metadata and pass it to the create_kb method.
|
176
|
-
If the kb creation fails, rollback the learning configuration for the kbid that was just created.
|
177
|
-
"""
|
178
|
-
kbid = request.forceuuid or str(uuid.uuid4())
|
179
|
-
release_channel = get_release_channel(request)
|
180
|
-
lconfig = await learning_proxy.get_configuration(kbid)
|
181
|
-
lconfig_created = False
|
182
|
-
if lconfig is None:
|
183
|
-
if request.learning_config:
|
184
|
-
# We parse the desired configuration from the request and set it
|
185
|
-
config = json.loads(request.learning_config)
|
186
|
-
else:
|
187
|
-
# We set an empty configuration so that learning chooses the default values.
|
188
|
-
config = {}
|
189
|
-
logger.warning(
|
190
|
-
"No learning configuration provided. Default will be used.",
|
191
|
-
extra={"kbid": kbid},
|
192
|
-
)
|
193
|
-
# NOTE: we rely on learning to return an updated configuration with
|
194
|
-
# matryoshka settings if they're available
|
195
|
-
lconfig = await learning_proxy.set_configuration(kbid, config=config)
|
196
|
-
lconfig_created = True
|
197
|
-
else:
|
198
|
-
logger.info("Learning configuration already exists", extra={"kbid": kbid})
|
199
|
-
try:
|
200
|
-
await self.proc.create_kb(
|
201
|
-
request.slug,
|
202
|
-
request.config,
|
203
|
-
parse_model_metadata_from_learning_config(lconfig),
|
204
|
-
forceuuid=kbid,
|
205
|
-
release_channel=release_channel,
|
206
|
-
)
|
207
|
-
return kbid
|
208
|
-
except Exception:
|
209
|
-
# Rollback learning config for the kbid that was just created
|
210
|
-
try:
|
211
|
-
if lconfig_created:
|
212
|
-
await learning_proxy.delete_configuration(kbid)
|
213
|
-
except Exception:
|
214
|
-
logger.warning(
|
215
|
-
"Could not rollback learning configuration",
|
216
|
-
exc_info=True,
|
217
|
-
extra={"kbid": kbid},
|
218
|
-
)
|
219
|
-
raise
|
220
|
-
|
221
|
-
async def _create_kb_hosted(self, request: KnowledgeBoxNew) -> str:
|
222
|
-
"""
|
223
|
-
For the hosted case, we assume that the learning configuration
|
224
|
-
is already set and we are given the model metadata in the request.
|
225
|
-
"""
|
226
|
-
kbid = request.forceuuid or str(uuid.uuid4())
|
227
|
-
release_channel = get_release_channel(request)
|
228
|
-
await self.proc.create_kb(
|
229
|
-
request.slug,
|
230
|
-
request.config,
|
231
|
-
parse_model_metadata_from_request(request),
|
232
|
-
forceuuid=kbid,
|
233
|
-
release_channel=release_channel,
|
234
|
-
)
|
235
|
-
return kbid
|
160
|
+
logger.info("KB created successfully", extra={"kbid": kbid})
|
161
|
+
return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.OK)
|
236
162
|
|
237
163
|
async def UpdateKnowledgeBox( # type: ignore
|
238
164
|
self, request: KnowledgeBoxUpdate, context=None
|
239
165
|
) -> UpdateKnowledgeBoxResponse:
|
240
|
-
|
241
|
-
|
242
|
-
|
166
|
+
if is_onprem_nucliadb():
|
167
|
+
logger.error(
|
168
|
+
"Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
|
169
|
+
)
|
243
170
|
return UpdateKnowledgeBoxResponse(
|
244
|
-
status=KnowledgeBoxResponseStatus.
|
171
|
+
status=KnowledgeBoxResponseStatus.ERROR,
|
245
172
|
)
|
173
|
+
|
174
|
+
try:
|
175
|
+
async with self.driver.transaction() as txn:
|
176
|
+
kbid = await KnowledgeBoxORM.update(
|
177
|
+
txn, uuid=request.uuid, slug=request.slug, config=request.config
|
178
|
+
)
|
179
|
+
await txn.commit()
|
180
|
+
except KnowledgeBoxNotFound:
|
181
|
+
return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.NOTFOUND)
|
246
182
|
except Exception:
|
247
|
-
logger.exception("Could not
|
183
|
+
logger.exception("Could not update KB", exc_info=True)
|
248
184
|
return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
|
249
|
-
return UpdateKnowledgeBoxResponse(
|
250
|
-
status=KnowledgeBoxResponseStatus.OK, uuid=kbid
|
251
|
-
)
|
185
|
+
return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK, uuid=kbid)
|
252
186
|
|
253
187
|
async def DeleteKnowledgeBox( # type: ignore
|
254
188
|
self, request: KnowledgeBoxID, context=None
|
255
189
|
) -> DeleteKnowledgeBoxResponse:
|
190
|
+
if is_onprem_nucliadb():
|
191
|
+
logger.error(
|
192
|
+
"Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
|
193
|
+
)
|
194
|
+
return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
|
195
|
+
|
256
196
|
try:
|
257
|
-
|
197
|
+
kbid = request.uuid
|
198
|
+
# learning configuration is automatically removed in nuclia backend for
|
199
|
+
# hosted users, we don't need to do it
|
200
|
+
await KnowledgeBoxORM.delete(self.driver, kbid=kbid)
|
258
201
|
except KnowledgeBoxNotFound:
|
259
202
|
logger.warning(f"KB not found: kbid={request.uuid}, slug={request.slug}")
|
260
203
|
except Exception:
|
@@ -262,28 +205,6 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
262
205
|
return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
|
263
206
|
return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK)
|
264
207
|
|
265
|
-
async def delete_kb(self, request: KnowledgeBoxID) -> None:
|
266
|
-
kbid = request.uuid
|
267
|
-
await self.proc.delete_kb(kbid)
|
268
|
-
# learning configuration is automatically removed in nuclia backend for
|
269
|
-
# hosted users, we only need to remove it for onprem
|
270
|
-
if is_onprem_nucliadb():
|
271
|
-
try:
|
272
|
-
await learning_proxy.delete_configuration(kbid)
|
273
|
-
logger.info("Learning configuration deleted", extra={"kbid": kbid})
|
274
|
-
except Exception:
|
275
|
-
logger.exception(
|
276
|
-
"Unexpected error deleting learning configuration",
|
277
|
-
exc_info=True,
|
278
|
-
extra={"kbid": kbid},
|
279
|
-
)
|
280
|
-
|
281
|
-
async def GCKnowledgeBox( # type: ignore
|
282
|
-
self, request: KnowledgeBoxID, context=None
|
283
|
-
) -> GCKnowledgeBoxResponse:
|
284
|
-
response = GCKnowledgeBoxResponse()
|
285
|
-
return response
|
286
|
-
|
287
208
|
async def ProcessMessage( # type: ignore
|
288
209
|
self, request_stream: AsyncIterator[BrokerMessage], context=None
|
289
210
|
):
|
@@ -301,86 +222,21 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
301
222
|
logger.info(f"Processed {message.uuid}")
|
302
223
|
return response
|
303
224
|
|
304
|
-
async def SetLabels(self, request: SetLabelsRequest, context=None) -> OpStatusWriter: # type: ignore
|
305
|
-
async with self.driver.transaction() as txn:
|
306
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
307
|
-
response = OpStatusWriter()
|
308
|
-
if kbobj is not None:
|
309
|
-
try:
|
310
|
-
await kbobj.set_labelset(request.id, request.labelset)
|
311
|
-
await txn.commit()
|
312
|
-
response.status = OpStatusWriter.Status.OK
|
313
|
-
except Exception as e:
|
314
|
-
errors.capture_exception(e)
|
315
|
-
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
316
|
-
response.status = OpStatusWriter.Status.ERROR
|
317
|
-
else:
|
318
|
-
response.status = OpStatusWriter.Status.NOTFOUND
|
319
|
-
return response
|
320
|
-
|
321
|
-
async def DelLabels(self, request: DelLabelsRequest, context=None) -> OpStatusWriter: # type: ignore
|
322
|
-
async with self.driver.transaction() as txn:
|
323
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
324
|
-
response = OpStatusWriter()
|
325
|
-
if kbobj is not None:
|
326
|
-
try:
|
327
|
-
await kbobj.del_labelset(request.id)
|
328
|
-
await txn.commit()
|
329
|
-
response.status = OpStatusWriter.Status.OK
|
330
|
-
except Exception as e:
|
331
|
-
errors.capture_exception(e)
|
332
|
-
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
333
|
-
response.status = OpStatusWriter.Status.ERROR
|
334
|
-
else:
|
335
|
-
response.status = OpStatusWriter.Status.NOTFOUND
|
336
|
-
|
337
|
-
return response
|
338
|
-
|
339
|
-
async def GetLabels(self, request: GetLabelsRequest, context=None) -> GetLabelsResponse: # type: ignore
|
340
|
-
async with self.driver.transaction() as txn:
|
341
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
342
|
-
labels: Optional[Labels] = None
|
343
|
-
if kbobj is not None:
|
344
|
-
labels = await kbobj.get_labels()
|
345
|
-
response = GetLabelsResponse()
|
346
|
-
if kbobj is None:
|
347
|
-
response.status = GetLabelsResponse.Status.NOTFOUND
|
348
|
-
else:
|
349
|
-
response.kb.uuid = kbobj.kbid
|
350
|
-
if labels is not None:
|
351
|
-
response.labels.CopyFrom(labels)
|
352
|
-
|
353
|
-
return response
|
354
|
-
|
355
|
-
async def GetLabelSet( # type: ignore
|
356
|
-
self, request: GetLabelSetRequest, context=None
|
357
|
-
) -> GetLabelSetResponse:
|
358
|
-
async with self.driver.transaction() as txn:
|
359
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
360
|
-
response = GetLabelSetResponse()
|
361
|
-
if kbobj is not None:
|
362
|
-
await kbobj.get_labelset(request.labelset, response)
|
363
|
-
response.kb.uuid = kbobj.kbid
|
364
|
-
response.status = GetLabelSetResponse.Status.OK
|
365
|
-
else:
|
366
|
-
response.status = GetLabelSetResponse.Status.NOTFOUND
|
367
|
-
return response
|
368
|
-
|
369
225
|
async def NewEntitiesGroup( # type: ignore
|
370
226
|
self, request: NewEntitiesGroupRequest, context=None
|
371
227
|
) -> NewEntitiesGroupResponse:
|
372
228
|
response = NewEntitiesGroupResponse()
|
373
|
-
async with self.driver.transaction() as
|
374
|
-
kbobj = await self.proc.get_kb_obj(
|
229
|
+
async with self.driver.transaction(read_only=True) as ro_txn:
|
230
|
+
kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
|
375
231
|
if kbobj is None:
|
376
232
|
response.status = NewEntitiesGroupResponse.Status.KB_NOT_FOUND
|
377
233
|
return response
|
378
234
|
|
235
|
+
async with self.driver.transaction() as txn:
|
236
|
+
kbobj.txn = txn
|
379
237
|
entities_manager = EntitiesManager(kbobj, txn)
|
380
238
|
try:
|
381
|
-
await entities_manager.create_entities_group(
|
382
|
-
request.group, request.entities
|
383
|
-
)
|
239
|
+
await entities_manager.create_entities_group(request.group, request.entities)
|
384
240
|
except AlreadyExists:
|
385
241
|
response.status = NewEntitiesGroupResponse.Status.ALREADY_EXISTS
|
386
242
|
return response
|
@@ -393,9 +249,8 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
393
249
|
self, request: GetEntitiesRequest, context=None
|
394
250
|
) -> GetEntitiesResponse:
|
395
251
|
response = GetEntitiesResponse()
|
396
|
-
async with self.driver.transaction() as txn:
|
252
|
+
async with self.driver.transaction(read_only=True) as txn:
|
397
253
|
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
398
|
-
|
399
254
|
if kbobj is None:
|
400
255
|
response.status = GetEntitiesResponse.Status.NOTFOUND
|
401
256
|
return response
|
@@ -416,9 +271,8 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
416
271
|
self, request: ListEntitiesGroupsRequest, context=None
|
417
272
|
) -> ListEntitiesGroupsResponse:
|
418
273
|
response = ListEntitiesGroupsResponse()
|
419
|
-
async with self.driver.transaction() as txn:
|
274
|
+
async with self.driver.transaction(read_only=True) as txn:
|
420
275
|
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
421
|
-
|
422
276
|
if kbobj is None:
|
423
277
|
response.status = ListEntitiesGroupsResponse.Status.NOTFOUND
|
424
278
|
return response
|
@@ -441,7 +295,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
441
295
|
self, request: GetEntitiesGroupRequest, context=None
|
442
296
|
) -> GetEntitiesGroupResponse:
|
443
297
|
response = GetEntitiesGroupResponse()
|
444
|
-
async with self.driver.transaction() as txn:
|
298
|
+
async with self.driver.transaction(read_only=True) as txn:
|
445
299
|
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
446
300
|
if kbobj is None:
|
447
301
|
response.status = GetEntitiesGroupResponse.Status.KB_NOT_FOUND
|
@@ -449,9 +303,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
449
303
|
|
450
304
|
entities_manager = EntitiesManager(kbobj, txn)
|
451
305
|
try:
|
452
|
-
entities_group = await entities_manager.get_entities_group(
|
453
|
-
request.group
|
454
|
-
)
|
306
|
+
entities_group = await entities_manager.get_entities_group(request.group)
|
455
307
|
except Exception as e:
|
456
308
|
errors.capture_exception(e)
|
457
309
|
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
@@ -459,9 +311,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
459
311
|
else:
|
460
312
|
response.kb.uuid = kbobj.kbid
|
461
313
|
if entities_group is None:
|
462
|
-
response.status =
|
463
|
-
GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
|
464
|
-
)
|
314
|
+
response.status = GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
|
465
315
|
else:
|
466
316
|
response.status = GetEntitiesGroupResponse.Status.OK
|
467
317
|
response.group.CopyFrom(entities_group)
|
@@ -470,17 +320,17 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
470
320
|
|
471
321
|
async def SetEntities(self, request: SetEntitiesRequest, context=None) -> OpStatusWriter: # type: ignore
|
472
322
|
response = OpStatusWriter()
|
473
|
-
async with self.driver.transaction() as
|
474
|
-
kbobj = await self.proc.get_kb_obj(
|
323
|
+
async with self.driver.transaction(read_only=True) as ro_txn:
|
324
|
+
kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
|
475
325
|
if kbobj is None:
|
476
326
|
response.status = OpStatusWriter.Status.NOTFOUND
|
477
327
|
return response
|
478
328
|
|
329
|
+
async with self.driver.transaction() as txn:
|
330
|
+
kbobj.txn = txn
|
479
331
|
entities_manager = EntitiesManager(kbobj, txn)
|
480
332
|
try:
|
481
|
-
await entities_manager.set_entities_group(
|
482
|
-
request.group, request.entities
|
483
|
-
)
|
333
|
+
await entities_manager.set_entities_group(request.group, request.entities)
|
484
334
|
except Exception as e:
|
485
335
|
errors.capture_exception(e)
|
486
336
|
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
@@ -494,14 +344,15 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
494
344
|
self, request: UpdateEntitiesGroupRequest, context=None
|
495
345
|
) -> UpdateEntitiesGroupResponse:
|
496
346
|
response = UpdateEntitiesGroupResponse()
|
497
|
-
async with self.driver.transaction() as
|
498
|
-
kbobj = await self.proc.get_kb_obj(
|
347
|
+
async with self.driver.transaction(read_only=True) as ro_txn:
|
348
|
+
kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
|
499
349
|
if kbobj is None:
|
500
350
|
response.status = UpdateEntitiesGroupResponse.Status.KB_NOT_FOUND
|
501
351
|
return response
|
502
352
|
|
353
|
+
async with self.driver.transaction() as txn:
|
354
|
+
kbobj.txn = txn
|
503
355
|
entities_manager = EntitiesManager(kbobj, txn)
|
504
|
-
|
505
356
|
try:
|
506
357
|
await entities_manager.set_entities_group_metadata(
|
507
358
|
request.group,
|
@@ -512,9 +363,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
512
363
|
await entities_manager.update_entities(request.group, updates)
|
513
364
|
await entities_manager.delete_entities(request.group, request.delete) # type: ignore
|
514
365
|
except EntitiesGroupNotFound:
|
515
|
-
response.status =
|
516
|
-
UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
|
517
|
-
)
|
366
|
+
response.status = UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
|
518
367
|
return response
|
519
368
|
|
520
369
|
await txn.commit()
|
@@ -523,12 +372,15 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
523
372
|
|
524
373
|
async def DelEntities(self, request: DelEntitiesRequest, context=None) -> OpStatusWriter: # type: ignore
|
525
374
|
response = OpStatusWriter()
|
526
|
-
|
527
|
-
|
375
|
+
|
376
|
+
async with self.driver.transaction(read_only=True) as ro_txn:
|
377
|
+
kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
|
528
378
|
if kbobj is None:
|
529
379
|
response.status = OpStatusWriter.Status.NOTFOUND
|
530
380
|
return response
|
531
381
|
|
382
|
+
async with self.driver.transaction() as txn:
|
383
|
+
kbobj.txn = txn
|
532
384
|
entities_manager = EntitiesManager(kbobj, txn)
|
533
385
|
try:
|
534
386
|
await entities_manager.delete_entities_group(request.group)
|
@@ -546,7 +398,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
546
398
|
) -> WriterStatusResponse:
|
547
399
|
logger.info("Status Call")
|
548
400
|
response = WriterStatusResponse()
|
549
|
-
async with self.driver.transaction() as txn:
|
401
|
+
async with self.driver.transaction(read_only=True) as txn:
|
550
402
|
async for _, slug in datamanagers.kb.get_kbs(txn):
|
551
403
|
response.knowledgeboxes.append(slug)
|
552
404
|
|
@@ -580,7 +432,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
580
432
|
async with self.driver.transaction() as txn:
|
581
433
|
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
582
434
|
resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
|
583
|
-
bm = await
|
435
|
+
bm = await generate_broker_message(resobj)
|
584
436
|
transaction = get_transaction_utility()
|
585
437
|
partitioning = get_partitioning()
|
586
438
|
partition = partitioning.generate_partition(request.kbid, request.rid)
|
@@ -595,39 +447,25 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
595
447
|
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
596
448
|
resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
|
597
449
|
resobj.disable_vectors = not request.reindex_vectors
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
if shard is None:
|
608
|
-
shard = await self.shards_manager.get_current_active_shard(
|
609
|
-
txn, request.kbid
|
450
|
+
brain = await resobj.generate_index_message(reindex=True)
|
451
|
+
shard = await self.proc.get_or_assign_resource_shard(txn, kbobj, request.rid)
|
452
|
+
index_message = brain.brain
|
453
|
+
external_index_manager = await get_external_index_manager(kbid=request.kbid)
|
454
|
+
if external_index_manager is not None:
|
455
|
+
await self.proc.external_index_add_resource(
|
456
|
+
request.kbid,
|
457
|
+
request.rid,
|
458
|
+
index_message,
|
610
459
|
)
|
611
|
-
|
612
|
-
# no shard currently exists, create one
|
613
|
-
shard = await self.shards_manager.create_shard_by_kbid(
|
614
|
-
txn, request.kbid
|
615
|
-
)
|
616
|
-
|
617
|
-
await datamanagers.resources.set_resource_shard_id(
|
618
|
-
txn, kbid=request.kbid, rid=request.rid, shard=shard.shard
|
619
|
-
)
|
620
|
-
|
621
|
-
if shard is not None:
|
460
|
+
else:
|
622
461
|
await self.shards_manager.add_resource(
|
623
462
|
shard,
|
624
|
-
|
463
|
+
index_message,
|
625
464
|
0,
|
626
465
|
partition=self.partitions[0],
|
627
466
|
kb=request.kbid,
|
628
467
|
reindex_id=uuid.uuid4().hex,
|
629
468
|
)
|
630
|
-
|
631
469
|
response = IndexStatus()
|
632
470
|
return response
|
633
471
|
except Exception as e:
|
@@ -635,116 +473,51 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
|
|
635
473
|
logger.error("Error in ingest gRPC servicer", exc_info=True)
|
636
474
|
raise
|
637
475
|
|
638
|
-
async def
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
destination = self.storage.field_klass(
|
651
|
-
storage=self.storage, bucket=bucket, fullkey=data.metadata.key
|
652
|
-
)
|
653
|
-
cf.content_type = data.metadata.content_type
|
654
|
-
cf.filename = data.metadata.filename
|
655
|
-
cf.size = data.metadata.size
|
656
|
-
else:
|
657
|
-
raise AttributeError("Metadata not found")
|
658
|
-
|
659
|
-
async def generate_buffer(
|
660
|
-
storage: Storage, request: AsyncIterator[UploadBinaryData] # type: ignore
|
661
|
-
):
|
662
|
-
# Storage requires uploading chunks of a specified size, this is
|
663
|
-
# why we need to have an intermediate buffer
|
664
|
-
buf = BytesIO()
|
665
|
-
async for chunk in request:
|
666
|
-
if not chunk.HasField("payload"):
|
667
|
-
raise AttributeError("Payload not found")
|
668
|
-
buf.write(chunk.payload)
|
669
|
-
while buf.tell() > storage.chunk_size:
|
670
|
-
buf.seek(0)
|
671
|
-
data = buf.read(storage.chunk_size)
|
672
|
-
if len(data):
|
673
|
-
yield data
|
674
|
-
old_data = buf.read()
|
675
|
-
buf = BytesIO()
|
676
|
-
buf.write(old_data)
|
677
|
-
buf.seek(0)
|
678
|
-
data = buf.read()
|
679
|
-
if len(data):
|
680
|
-
yield data
|
681
|
-
|
682
|
-
if destination is None:
|
683
|
-
raise AttributeError("No destination file")
|
684
|
-
await self.storage.uploaditerator(
|
685
|
-
generate_buffer(self.storage, request), destination, cf
|
476
|
+
async def NewVectorSet( # type: ignore
|
477
|
+
self, request: NewVectorSetRequest, context=None
|
478
|
+
) -> NewVectorSetResponse:
|
479
|
+
config = VectorSetConfig(
|
480
|
+
vectorset_id=request.vectorset_id,
|
481
|
+
vectorset_index_config=nodewriter_pb2.VectorIndexConfig(
|
482
|
+
similarity=request.similarity,
|
483
|
+
normalize_vectors=request.normalize_vectors,
|
484
|
+
vector_type=request.vector_type,
|
485
|
+
vector_dimension=request.vector_dimension,
|
486
|
+
),
|
487
|
+
matryoshka_dimensions=request.matryoshka_dimensions,
|
686
488
|
)
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
model.similarity_function = LEARNING_SIMILARITY_FUNCTION_TO_PROTO[
|
702
|
-
lconfig.semantic_vector_similarity
|
703
|
-
]
|
704
|
-
if lconfig.semantic_vector_size is not None:
|
705
|
-
model.vector_dimension = lconfig.semantic_vector_size
|
706
|
-
else:
|
707
|
-
logger.warning("Vector dimension not set!")
|
708
|
-
if lconfig.semantic_matryoshka_dimensions is not None:
|
709
|
-
model.matryoshka_dimensions.extend(lconfig.semantic_matryoshka_dimensions)
|
710
|
-
return model
|
711
|
-
|
712
|
-
|
713
|
-
def parse_model_metadata_from_request(
|
714
|
-
request: KnowledgeBoxNew,
|
715
|
-
) -> SemanticModelMetadata:
|
716
|
-
model = SemanticModelMetadata()
|
717
|
-
model.similarity_function = request.similarity
|
718
|
-
if request.HasField("vector_dimension"):
|
719
|
-
model.vector_dimension = request.vector_dimension
|
720
|
-
else:
|
721
|
-
logger.warning(
|
722
|
-
"Vector dimension not set. Will be detected automatically on the first vector set."
|
723
|
-
)
|
724
|
-
|
725
|
-
if len(request.matryoshka_dimensions) > 0:
|
726
|
-
if model.vector_dimension not in request.matryoshka_dimensions:
|
727
|
-
logger.warning(
|
728
|
-
"Vector dimensions is inconsistent with matryoshka dimensions! Ignoring them",
|
729
|
-
extra={
|
730
|
-
"kbid": request.forceuuid,
|
731
|
-
"kbslug": request.slug,
|
732
|
-
},
|
733
|
-
)
|
489
|
+
response = NewVectorSetResponse()
|
490
|
+
try:
|
491
|
+
async with self.driver.transaction() as txn:
|
492
|
+
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
493
|
+
await kbobj.create_vectorset(config)
|
494
|
+
await txn.commit()
|
495
|
+
except VectorSetConflict as exc:
|
496
|
+
response.status = NewVectorSetResponse.Status.ERROR
|
497
|
+
response.details = str(exc)
|
498
|
+
except Exception as exc:
|
499
|
+
errors.capture_exception(exc)
|
500
|
+
logger.error("Error in ingest gRPC while creating a vectorset", exc_info=True)
|
501
|
+
response.status = NewVectorSetResponse.Status.ERROR
|
502
|
+
response.details = str(exc)
|
734
503
|
else:
|
735
|
-
|
736
|
-
|
737
|
-
return model
|
738
|
-
|
504
|
+
response.status = NewVectorSetResponse.Status.OK
|
505
|
+
return response
|
739
506
|
|
740
|
-
def
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
507
|
+
async def DelVectorSet( # type: ignore
|
508
|
+
self, request: DelVectorSetRequest, context=None
|
509
|
+
) -> DelVectorSetResponse:
|
510
|
+
response = DelVectorSetResponse()
|
511
|
+
try:
|
512
|
+
async with self.driver.transaction() as txn:
|
513
|
+
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
|
514
|
+
await kbobj.delete_vectorset(request.vectorset_id)
|
515
|
+
await txn.commit()
|
516
|
+
except Exception as exc:
|
517
|
+
errors.capture_exception(exc)
|
518
|
+
logger.error("Error in ingest gRPC while deleting a vectorset", exc_info=True)
|
519
|
+
response.status = DelVectorSetResponse.Status.ERROR
|
520
|
+
response.details = str(exc)
|
521
|
+
else:
|
522
|
+
response.status = DelVectorSetResponse.Status.OK
|
523
|
+
return response
|