nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -22,22 +22,21 @@ from typing import Optional, Union
|
|
22
22
|
from fastapi import Header, HTTPException, Query, Request, Response
|
23
23
|
from fastapi_versioning import version
|
24
24
|
|
25
|
-
|
25
|
+
from nucliadb.common.datamanagers.resources import KB_RESOURCE_SLUG_BASE
|
26
26
|
from nucliadb.common.maindb.utils import get_driver
|
27
|
+
from nucliadb.common.models_utils import from_proto, to_proto
|
27
28
|
from nucliadb.ingest.fields.conversation import Conversation
|
28
29
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as ORMKnowledgeBox
|
29
|
-
from nucliadb.ingest.orm.resource import KB_RESOURCE_SLUG_BASE
|
30
30
|
from nucliadb.ingest.orm.resource import Resource as ORMResource
|
31
31
|
from nucliadb.ingest.serialize import (
|
32
32
|
managed_serialize,
|
33
33
|
serialize,
|
34
34
|
set_resource_field_extracted_data,
|
35
35
|
)
|
36
|
-
from nucliadb.reader import SERVICE_NAME
|
36
|
+
from nucliadb.reader import SERVICE_NAME
|
37
37
|
from nucliadb.reader.api import DEFAULT_RESOURCE_LIST_PAGE_SIZE
|
38
38
|
from nucliadb.reader.api.models import (
|
39
39
|
FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP,
|
40
|
-
FIELD_NAMES_TO_PB_TYPE_MAP,
|
41
40
|
ResourceField,
|
42
41
|
)
|
43
42
|
from nucliadb.reader.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
|
@@ -77,60 +76,55 @@ async def list_resources(
|
|
77
76
|
|
78
77
|
# Get counters from maindb
|
79
78
|
driver = get_driver()
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
raise HTTPException(
|
130
|
-
status_code=500, detail="Couldn't retrieve list of resources right now"
|
131
|
-
)
|
132
|
-
finally:
|
133
|
-
await txn.abort()
|
79
|
+
async with driver.transaction(read_only=True) as txn:
|
80
|
+
# Filter parameters for serializer
|
81
|
+
show: list[ResourceProperties] = [ResourceProperties.BASIC]
|
82
|
+
field_types: list[FieldTypeName] = []
|
83
|
+
extracted: list[ExtractedDataTypeName] = []
|
84
|
+
|
85
|
+
try:
|
86
|
+
resources: list[Resource] = []
|
87
|
+
max_items_to_iterate = (page + 1) * size
|
88
|
+
first_wanted_item_index = (page * size) + 1 # 1-based index
|
89
|
+
current_key_index = 0
|
90
|
+
|
91
|
+
# ask for one item more than we need, in order to know if it's the last page
|
92
|
+
keys_generator = txn.keys(
|
93
|
+
match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid),
|
94
|
+
count=max_items_to_iterate + 1,
|
95
|
+
)
|
96
|
+
async for key in keys_generator:
|
97
|
+
current_key_index += 1
|
98
|
+
|
99
|
+
# First of all, we need to skip keys, in case we are on a +1 page
|
100
|
+
if page > 0 and current_key_index < first_wanted_item_index:
|
101
|
+
continue
|
102
|
+
|
103
|
+
# Don't fetch keys once we got all items for this
|
104
|
+
if len(resources) == size:
|
105
|
+
await keys_generator.aclose()
|
106
|
+
break
|
107
|
+
|
108
|
+
# Fetch and Add wanted item
|
109
|
+
rid = await txn.get(key, for_update=False)
|
110
|
+
if rid:
|
111
|
+
result = await managed_serialize(
|
112
|
+
txn,
|
113
|
+
kbid,
|
114
|
+
rid.decode(),
|
115
|
+
show,
|
116
|
+
field_types,
|
117
|
+
extracted,
|
118
|
+
service_name=SERVICE_NAME,
|
119
|
+
)
|
120
|
+
if result is not None:
|
121
|
+
resources.append(result)
|
122
|
+
|
123
|
+
is_last_page = current_key_index <= max_items_to_iterate
|
124
|
+
|
125
|
+
except Exception as exc:
|
126
|
+
errors.capture_exception(exc)
|
127
|
+
raise HTTPException(status_code=500, detail="Couldn't retrieve list of resources right now")
|
134
128
|
|
135
129
|
return ResourceList(
|
136
130
|
resources=resources,
|
@@ -141,7 +135,7 @@ async def list_resources(
|
|
141
135
|
@api.get(
|
142
136
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}",
|
143
137
|
status_code=200,
|
144
|
-
|
138
|
+
summary="Get Resource (by id)",
|
145
139
|
response_model=Resource,
|
146
140
|
response_model_exclude_unset=True,
|
147
141
|
tags=["Resources"],
|
@@ -153,9 +147,7 @@ async def get_resource_by_uuid(
|
|
153
147
|
kbid: str,
|
154
148
|
rid: str,
|
155
149
|
show: list[ResourceProperties] = Query([ResourceProperties.BASIC]),
|
156
|
-
field_type_filter: list[FieldTypeName] = Query(
|
157
|
-
list(FieldTypeName), alias="field_type"
|
158
|
-
),
|
150
|
+
field_type_filter: list[FieldTypeName] = Query(list(FieldTypeName), alias="field_type"),
|
159
151
|
extracted: list[ExtractedDataTypeName] = Query(
|
160
152
|
[
|
161
153
|
ExtractedDataTypeName.TEXT,
|
@@ -181,7 +173,7 @@ async def get_resource_by_uuid(
|
|
181
173
|
@api.get(
|
182
174
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}",
|
183
175
|
status_code=200,
|
184
|
-
|
176
|
+
summary="Get Resource (by slug)",
|
185
177
|
response_model=Resource,
|
186
178
|
response_model_exclude_unset=True,
|
187
179
|
tags=["Resources"],
|
@@ -193,9 +185,7 @@ async def get_resource_by_slug(
|
|
193
185
|
kbid: str,
|
194
186
|
rslug: str,
|
195
187
|
show: list[ResourceProperties] = Query([ResourceProperties.BASIC]),
|
196
|
-
field_type_filter: list[FieldTypeName] = Query(
|
197
|
-
list(FieldTypeName), alias="field_type"
|
198
|
-
),
|
188
|
+
field_type_filter: list[FieldTypeName] = Query(list(FieldTypeName), alias="field_type"),
|
199
189
|
extracted: list[ExtractedDataTypeName] = Query(
|
200
190
|
[
|
201
191
|
ExtractedDataTypeName.TEXT,
|
@@ -235,7 +225,7 @@ async def _get_resource(
|
|
235
225
|
audit = get_audit()
|
236
226
|
if audit is not None:
|
237
227
|
audit_id = rid if rid else rslug
|
238
|
-
|
228
|
+
audit.visited(kbid, audit_id, x_nucliadb_user, x_forwarded_for) # type: ignore
|
239
229
|
|
240
230
|
result = await serialize(
|
241
231
|
kbid,
|
@@ -254,7 +244,7 @@ async def _get_resource(
|
|
254
244
|
@api.get(
|
255
245
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/{{field_type}}/{{field_id}}",
|
256
246
|
status_code=200,
|
257
|
-
|
247
|
+
summary="Get Resource field (by slug)",
|
258
248
|
response_model=ResourceField,
|
259
249
|
response_model_exclude_unset=True,
|
260
250
|
tags=["Resource fields"],
|
@@ -294,7 +284,7 @@ async def get_resource_field_rslug_prefix(
|
|
294
284
|
@api.get(
|
295
285
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/{{field_type}}/{{field_id}}",
|
296
286
|
status_code=200,
|
297
|
-
|
287
|
+
summary="Get Resource field (by id)",
|
298
288
|
response_model=ResourceField,
|
299
289
|
response_model_exclude_unset=True,
|
300
290
|
tags=["Resource fields"],
|
@@ -343,9 +333,7 @@ async def _get_resource_field(
|
|
343
333
|
) -> Response:
|
344
334
|
storage = await get_storage(service_name=SERVICE_NAME)
|
345
335
|
driver = get_driver()
|
346
|
-
|
347
|
-
pb_field_id = FIELD_NAMES_TO_PB_TYPE_MAP[field_type]
|
348
|
-
|
336
|
+
pb_field_id = to_proto.field_type_name(field_type)
|
349
337
|
async with driver.transaction() as txn:
|
350
338
|
kb = ORMKnowledgeBox(txn, storage, kbid)
|
351
339
|
|
@@ -360,34 +348,22 @@ async def _get_resource_field(
|
|
360
348
|
if field is None:
|
361
349
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
362
350
|
|
363
|
-
resource_field = ResourceField(field_id=field_id, field_type=field_type)
|
351
|
+
resource_field = ResourceField(field_id=field_id, field_type=field_type)
|
364
352
|
|
365
353
|
if ResourceFieldProperties.VALUE in show:
|
366
354
|
value = await field.get_value()
|
367
355
|
|
368
356
|
if isinstance(value, resources_pb2.FieldText):
|
369
357
|
value = await field.get_value()
|
370
|
-
resource_field.value =
|
358
|
+
resource_field.value = from_proto.field_text(value)
|
371
359
|
|
372
360
|
if isinstance(value, resources_pb2.FieldFile):
|
373
361
|
value = await field.get_value()
|
374
|
-
resource_field.value =
|
362
|
+
resource_field.value = from_proto.field_file(value)
|
375
363
|
|
376
364
|
if isinstance(value, resources_pb2.FieldLink):
|
377
365
|
value = await field.get_value()
|
378
|
-
resource_field.value =
|
379
|
-
|
380
|
-
if isinstance(value, resources_pb2.FieldLayout):
|
381
|
-
value = await field.get_value()
|
382
|
-
resource_field.value = models.FieldLayout.from_message(value)
|
383
|
-
|
384
|
-
if isinstance(value, resources_pb2.FieldDatetime):
|
385
|
-
value = await field.get_value()
|
386
|
-
resource_field.value = models.FieldDatetime.from_message(value)
|
387
|
-
|
388
|
-
if isinstance(value, resources_pb2.FieldKeywordset):
|
389
|
-
value = await field.get_value()
|
390
|
-
resource_field.value = models.FieldKeywordset.from_message(value)
|
366
|
+
resource_field.value = from_proto.field_link(value)
|
391
367
|
|
392
368
|
if isinstance(field, Conversation):
|
393
369
|
if page == "first":
|
@@ -400,12 +376,10 @@ async def _get_resource_field(
|
|
400
376
|
|
401
377
|
value = await field.get_value(page=page_to_fetch)
|
402
378
|
if value is not None:
|
403
|
-
resource_field.value =
|
379
|
+
resource_field.value = from_proto.conversation(value)
|
404
380
|
|
405
381
|
if ResourceFieldProperties.EXTRACTED in show and extracted:
|
406
|
-
resource_field.extracted = FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP[
|
407
|
-
field_type
|
408
|
-
]()
|
382
|
+
resource_field.extracted = FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP[field_type]()
|
409
383
|
await set_resource_field_extracted_data(
|
410
384
|
field,
|
411
385
|
resource_field.extracted,
|
@@ -419,6 +393,6 @@ async def _get_resource_field(
|
|
419
393
|
resource_field.error = Error(body=error.error, code=error.code)
|
420
394
|
|
421
395
|
return Response(
|
422
|
-
content=resource_field.
|
396
|
+
content=resource_field.model_dump_json(exclude_unset=True, by_alias=True),
|
423
397
|
media_type="application/json",
|
424
398
|
)
|
@@ -22,30 +22,17 @@ from typing import Optional, Union
|
|
22
22
|
|
23
23
|
from fastapi import HTTPException
|
24
24
|
from fastapi.responses import StreamingResponse
|
25
|
-
from fastapi_versioning import version
|
25
|
+
from fastapi_versioning import version
|
26
26
|
from google.protobuf.json_format import MessageToDict
|
27
|
-
from nucliadb_protos.knowledgebox_pb2 import KnowledgeBoxID
|
28
|
-
from nucliadb_protos.writer_pb2 import (
|
29
|
-
GetEntitiesGroupRequest,
|
30
|
-
GetEntitiesGroupResponse,
|
31
|
-
GetLabelSetRequest,
|
32
|
-
GetLabelSetResponse,
|
33
|
-
GetLabelsRequest,
|
34
|
-
GetLabelsResponse,
|
35
|
-
GetSynonymsResponse,
|
36
|
-
GetVectorSetsRequest,
|
37
|
-
GetVectorSetsResponse,
|
38
|
-
ListEntitiesGroupsRequest,
|
39
|
-
ListEntitiesGroupsResponse,
|
40
|
-
OpStatusWriter,
|
41
|
-
)
|
42
27
|
from starlette.requests import Request
|
43
28
|
|
44
29
|
from nucliadb.common import datamanagers
|
45
30
|
from nucliadb.common.cluster.settings import in_standalone_mode
|
46
31
|
from nucliadb.common.context.fastapi import get_app_context
|
32
|
+
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
47
33
|
from nucliadb.common.http_clients import processing
|
48
34
|
from nucliadb.common.maindb.utils import get_driver
|
35
|
+
from nucliadb.common.models_utils import from_proto
|
49
36
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
50
37
|
from nucliadb.models.responses import HTTPClientError
|
51
38
|
from nucliadb.reader import SERVICE_NAME
|
@@ -53,13 +40,19 @@ from nucliadb.reader.api.v1.router import KB_PREFIX, api
|
|
53
40
|
from nucliadb.reader.reader.notifications import kb_notifications_stream
|
54
41
|
from nucliadb_models.entities import (
|
55
42
|
EntitiesGroup,
|
56
|
-
EntitiesGroupSummary,
|
57
43
|
KnowledgeBoxEntities,
|
58
44
|
)
|
59
45
|
from nucliadb_models.labels import KnowledgeBoxLabels, LabelSet
|
60
46
|
from nucliadb_models.resource import NucliaDBRoles
|
61
47
|
from nucliadb_models.synonyms import KnowledgeBoxSynonyms
|
62
|
-
from
|
48
|
+
from nucliadb_protos import writer_pb2
|
49
|
+
from nucliadb_protos.knowledgebox_pb2 import Synonyms
|
50
|
+
from nucliadb_protos.writer_pb2 import (
|
51
|
+
GetEntitiesGroupRequest,
|
52
|
+
GetEntitiesGroupResponse,
|
53
|
+
ListEntitiesGroupsRequest,
|
54
|
+
ListEntitiesGroupsResponse,
|
55
|
+
)
|
63
56
|
from nucliadb_utils.authentication import requires
|
64
57
|
from nucliadb_utils.utilities import get_ingest, get_storage
|
65
58
|
|
@@ -67,7 +60,7 @@ from nucliadb_utils.utilities import get_ingest, get_storage
|
|
67
60
|
@api.get(
|
68
61
|
f"/{KB_PREFIX}/{{kbid}}/entitiesgroups",
|
69
62
|
status_code=200,
|
70
|
-
|
63
|
+
summary="Get Knowledge Box Entities",
|
71
64
|
response_model=KnowledgeBoxEntities,
|
72
65
|
tags=["Knowledge Box Services"],
|
73
66
|
)
|
@@ -93,25 +86,21 @@ async def list_entities_groups(kbid: str):
|
|
93
86
|
if entities_groups.status == ListEntitiesGroupsResponse.Status.OK:
|
94
87
|
response = KnowledgeBoxEntities(uuid=kbid)
|
95
88
|
for key, eg_summary in entities_groups.groups.items():
|
96
|
-
entities_group =
|
89
|
+
entities_group = from_proto.entities_group_summary(eg_summary)
|
97
90
|
response.groups[key] = entities_group
|
98
91
|
return response
|
99
92
|
elif entities_groups.status == ListEntitiesGroupsResponse.Status.NOTFOUND:
|
100
93
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
101
94
|
elif entities_groups.status == ListEntitiesGroupsResponse.Status.ERROR:
|
102
|
-
raise HTTPException(
|
103
|
-
status_code=500, detail="Error while listing entities groups"
|
104
|
-
)
|
95
|
+
raise HTTPException(status_code=500, detail="Error while listing entities groups")
|
105
96
|
else:
|
106
|
-
raise HTTPException(
|
107
|
-
status_code=500, detail="Error on listing Knowledge box entities"
|
108
|
-
)
|
97
|
+
raise HTTPException(status_code=500, detail="Error on listing Knowledge box entities")
|
109
98
|
|
110
99
|
|
111
100
|
@api.get(
|
112
101
|
f"/{KB_PREFIX}/{{kbid}}/entitiesgroup/{{group}}",
|
113
102
|
status_code=200,
|
114
|
-
|
103
|
+
summary="Get a Knowledge Box Entities Group",
|
115
104
|
response_model=EntitiesGroup,
|
116
105
|
tags=["Knowledge Box Services"],
|
117
106
|
)
|
@@ -125,123 +114,90 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
|
|
125
114
|
|
126
115
|
kbobj: GetEntitiesGroupResponse = await ingest.GetEntitiesGroup(l_request) # type: ignore
|
127
116
|
if kbobj.status == GetEntitiesGroupResponse.Status.OK:
|
128
|
-
response =
|
117
|
+
response = from_proto.entities_group(kbobj.group)
|
129
118
|
return response
|
130
119
|
elif kbobj.status == GetEntitiesGroupResponse.Status.KB_NOT_FOUND:
|
131
|
-
raise HTTPException(
|
132
|
-
status_code=404, detail=f"Knowledge Box '{kbid}' does not exist"
|
133
|
-
)
|
120
|
+
raise HTTPException(status_code=404, detail=f"Knowledge Box '{kbid}' does not exist")
|
134
121
|
elif kbobj.status == GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND:
|
135
|
-
raise HTTPException(
|
136
|
-
status_code=404, detail=f"Entities group '{group}' does not exist"
|
137
|
-
)
|
122
|
+
raise HTTPException(status_code=404, detail=f"Entities group '{group}' does not exist")
|
138
123
|
else:
|
139
|
-
raise HTTPException(
|
140
|
-
status_code=500, detail="Error on getting entities group on a Knowledge box"
|
141
|
-
)
|
124
|
+
raise HTTPException(status_code=500, detail="Error on getting entities group on a Knowledge box")
|
142
125
|
|
143
126
|
|
144
127
|
@api.get(
|
145
128
|
f"/{KB_PREFIX}/{{kbid}}/labelsets",
|
146
129
|
status_code=200,
|
147
|
-
|
130
|
+
summary="Get Knowledge Box Label Sets",
|
148
131
|
response_model=KnowledgeBoxLabels,
|
149
132
|
tags=["Knowledge Box Services"],
|
150
133
|
)
|
151
134
|
@requires(NucliaDBRoles.READER)
|
152
135
|
@version(1)
|
153
|
-
async def
|
154
|
-
|
155
|
-
|
156
|
-
|
136
|
+
async def get_labelsets_endoint(request: Request, kbid: str) -> KnowledgeBoxLabels:
|
137
|
+
try:
|
138
|
+
return await get_labelsets(kbid)
|
139
|
+
except KnowledgeBoxNotFound:
|
140
|
+
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
141
|
+
|
157
142
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
143
|
+
async def get_labelsets(kbid: str) -> KnowledgeBoxLabels:
|
144
|
+
kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
|
145
|
+
if not kb_exists:
|
146
|
+
raise KnowledgeBoxNotFound()
|
147
|
+
labelsets: writer_pb2.Labels = await datamanagers.atomic.labelset.get_all(kbid=kbid)
|
148
|
+
response = KnowledgeBoxLabels(uuid=kbid)
|
149
|
+
for labelset, labelset_data in labelsets.labelset.items():
|
150
|
+
labelset_response = LabelSet(
|
151
|
+
**MessageToDict(
|
152
|
+
labelset_data,
|
153
|
+
preserving_proto_field_name=True,
|
154
|
+
including_default_value_fields=True,
|
168
155
|
)
|
169
|
-
response.labelsets[labelset] = labelset_response
|
170
|
-
return response
|
171
|
-
elif kbobj.status == GetLabelsResponse.Status.NOTFOUND:
|
172
|
-
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
173
|
-
else:
|
174
|
-
raise HTTPException(
|
175
|
-
status_code=500, detail="Error on getting Knowledge box labels"
|
176
156
|
)
|
157
|
+
response.labelsets[labelset] = labelset_response
|
158
|
+
return response
|
177
159
|
|
178
160
|
|
179
161
|
@api.get(
|
180
162
|
f"/{KB_PREFIX}/{{kbid}}/labelset/{{labelset}}",
|
181
163
|
status_code=200,
|
182
|
-
|
164
|
+
summary="Get a Knowledge Box Label Set",
|
183
165
|
response_model=LabelSet,
|
184
166
|
tags=["Knowledge Box Services"],
|
185
167
|
)
|
186
168
|
@requires(NucliaDBRoles.READER)
|
187
169
|
@version(1)
|
188
|
-
async def
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
170
|
+
async def get_labelset_endpoint(request: Request, kbid: str, labelset: str) -> LabelSet:
|
171
|
+
try:
|
172
|
+
return await get_labelset(kbid, labelset)
|
173
|
+
except KnowledgeBoxNotFound:
|
174
|
+
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
193
175
|
|
194
|
-
|
195
|
-
|
176
|
+
|
177
|
+
async def get_labelset(kbid: str, labelset_id: str) -> LabelSet:
|
178
|
+
kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
|
179
|
+
if not kb_exists:
|
180
|
+
raise KnowledgeBoxNotFound()
|
181
|
+
labelset: Optional[writer_pb2.LabelSet] = await datamanagers.atomic.labelset.get(
|
182
|
+
kbid=kbid, labelset_id=labelset_id
|
183
|
+
)
|
184
|
+
if labelset is None:
|
185
|
+
response = LabelSet()
|
186
|
+
else:
|
196
187
|
response = LabelSet(
|
197
188
|
**MessageToDict(
|
198
|
-
|
189
|
+
labelset,
|
199
190
|
preserving_proto_field_name=True,
|
200
191
|
including_default_value_fields=True,
|
201
192
|
)
|
202
193
|
)
|
203
|
-
|
204
|
-
elif kbobj.status == GetLabelSetResponse.Status.NOTFOUND:
|
205
|
-
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
206
|
-
else:
|
207
|
-
raise HTTPException(
|
208
|
-
status_code=500, detail="Error on getting labelset on a Knowledge box"
|
209
|
-
)
|
210
|
-
|
211
|
-
|
212
|
-
@api.get(
|
213
|
-
f"/{KB_PREFIX}/{{kbid}}/vectorsets",
|
214
|
-
status_code=200,
|
215
|
-
name="Get Knowledge Box Vector Sets",
|
216
|
-
tags=["Knowledge Box Services"],
|
217
|
-
response_model=VectorSets,
|
218
|
-
openapi_extra={"x-operation_order": 1},
|
219
|
-
)
|
220
|
-
@requires(NucliaDBRoles.READER)
|
221
|
-
@version(1)
|
222
|
-
async def get_vectorsets(request: Request, kbid: str):
|
223
|
-
ingest = get_ingest()
|
224
|
-
pbrequest: GetVectorSetsRequest = GetVectorSetsRequest()
|
225
|
-
pbrequest.kb.uuid = kbid
|
226
|
-
|
227
|
-
vectorsets: GetVectorSetsResponse = await ingest.GetVectorSets(pbrequest) # type: ignore
|
228
|
-
if vectorsets.status == GetVectorSetsResponse.Status.OK:
|
229
|
-
result = VectorSets(vectorsets={})
|
230
|
-
for key, vector in vectorsets.vectorsets.vectorsets.items():
|
231
|
-
result.vectorsets[key] = VectorSet.from_message(vector)
|
232
|
-
return result
|
233
|
-
elif vectorsets.status == GetVectorSetsResponse.Status.NOTFOUND:
|
234
|
-
raise HTTPException(status_code=404, detail="VectorSet does not exist")
|
235
|
-
elif vectorsets.status == GetVectorSetsResponse.Status.ERROR:
|
236
|
-
raise HTTPException(
|
237
|
-
status_code=500, detail="Error on getting vectorset on a Knowledge box"
|
238
|
-
)
|
194
|
+
return response
|
239
195
|
|
240
196
|
|
241
197
|
@api.get(
|
242
198
|
f"/{KB_PREFIX}/{{kbid}}/custom-synonyms",
|
243
199
|
status_code=200,
|
244
|
-
|
200
|
+
summary="Get Knowledge Box Custom Synonyms",
|
245
201
|
tags=["Knowledge Box Services"],
|
246
202
|
response_model=KnowledgeBoxSynonyms,
|
247
203
|
openapi_extra={"x-operation_order": 2},
|
@@ -249,23 +205,16 @@ async def get_vectorsets(request: Request, kbid: str):
|
|
249
205
|
@requires(NucliaDBRoles.READER)
|
250
206
|
@version(1)
|
251
207
|
async def get_custom_synonyms(request: Request, kbid: str):
|
252
|
-
|
253
|
-
pbrequest = KnowledgeBoxID(uuid=kbid)
|
254
|
-
pbresponse: GetSynonymsResponse = await ingest.GetSynonyms(pbrequest) # type: ignore
|
255
|
-
if pbresponse.status.status == OpStatusWriter.Status.OK:
|
256
|
-
return KnowledgeBoxSynonyms.from_message(pbresponse.synonyms)
|
257
|
-
elif pbresponse.status.status == OpStatusWriter.Status.NOTFOUND:
|
208
|
+
if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
|
258
209
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
259
|
-
|
260
|
-
|
261
|
-
status_code=500, detail="Error getting synonyms of a Knowledge box"
|
262
|
-
)
|
210
|
+
synonyms = await datamanagers.atomic.synonyms.get(kbid=kbid) or Synonyms()
|
211
|
+
return from_proto.kb_synonyms(synonyms)
|
263
212
|
|
264
213
|
|
265
214
|
@api.get(
|
266
215
|
f"/{KB_PREFIX}/{{kbid}}/notifications",
|
267
216
|
status_code=200,
|
268
|
-
|
217
|
+
summary="Knowledge Box Notifications Stream",
|
269
218
|
description="Provides a stream of activity notifications for the given Knowledge Box. The stream will be automatically closed after 2 minutes.", # noqa: E501
|
270
219
|
tags=["Knowledge Box Services"],
|
271
220
|
response_description="Each line of the response is a Base64-encoded JSON object representing a notification. Refer to [the internal documentation](https://github.com/nuclia/nucliadb/blob/main/docs/tutorials/KB_NOTIFICATIONS.md) for a more detailed explanation of each notification type.", # noqa: E501
|
@@ -298,14 +247,14 @@ async def notifications_endpoint(
|
|
298
247
|
|
299
248
|
|
300
249
|
async def exists_kb(kbid: str) -> bool:
|
301
|
-
async with datamanagers.
|
250
|
+
async with datamanagers.with_ro_transaction() as txn:
|
302
251
|
return await datamanagers.kb.exists_kb(txn, kbid=kbid)
|
303
252
|
|
304
253
|
|
305
254
|
@api.get(
|
306
255
|
f"/{KB_PREFIX}/{{kbid}}/processing-status",
|
307
256
|
status_code=200,
|
308
|
-
|
257
|
+
summary="Knowledge Box Processing Status",
|
309
258
|
description="Provides the status of the processing of the given Knowledge Box.",
|
310
259
|
tags=["Knowledge Box Services"],
|
311
260
|
response_model=processing.RequestsResults,
|
@@ -326,14 +275,12 @@ async def processing_status(
|
|
326
275
|
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
327
276
|
|
328
277
|
async with processing.ProcessingHTTPClient() as client:
|
329
|
-
results = await client.requests(
|
330
|
-
cursor=cursor, scheduled=scheduled, kbid=kbid, limit=limit
|
331
|
-
)
|
278
|
+
results = await client.requests(cursor=cursor, scheduled=scheduled, kbid=kbid, limit=limit)
|
332
279
|
|
333
280
|
storage = await get_storage(service_name=SERVICE_NAME)
|
334
281
|
driver = get_driver()
|
335
282
|
|
336
|
-
async with driver.transaction(
|
283
|
+
async with driver.transaction(read_only=True) as txn:
|
337
284
|
kb = KnowledgeBox(txn, storage, kbid)
|
338
285
|
|
339
286
|
max_simultaneous = asyncio.Semaphore(10)
|
@@ -355,9 +302,7 @@ async def processing_status(
|
|
355
302
|
|
356
303
|
result_items = [
|
357
304
|
item
|
358
|
-
for item in await asyncio.gather(
|
359
|
-
*[_composition(result) for result in results.results]
|
360
|
-
)
|
305
|
+
for item in await asyncio.gather(*[_composition(result) for result in results.results])
|
361
306
|
if item is not None
|
362
307
|
]
|
363
308
|
|