nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -18,127 +18,277 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import asyncio
|
21
|
-
import
|
21
|
+
from functools import partial
|
22
22
|
|
23
23
|
from fastapi import HTTPException, Response
|
24
24
|
from fastapi_versioning import version
|
25
|
-
from nucliadb_protos.knowledgebox_pb2 import (
|
26
|
-
DeleteKnowledgeBoxResponse,
|
27
|
-
KnowledgeBoxID,
|
28
|
-
KnowledgeBoxNew,
|
29
|
-
KnowledgeBoxResponseStatus,
|
30
|
-
KnowledgeBoxUpdate,
|
31
|
-
NewKnowledgeBoxResponse,
|
32
|
-
UpdateKnowledgeBoxResponse,
|
33
|
-
)
|
34
25
|
from starlette.requests import Request
|
35
26
|
|
27
|
+
from nucliadb import learning_proxy
|
28
|
+
from nucliadb.common import datamanagers
|
29
|
+
from nucliadb.common.external_index_providers.exceptions import (
|
30
|
+
ExternalIndexCreationError,
|
31
|
+
)
|
32
|
+
from nucliadb.common.maindb.utils import get_driver
|
33
|
+
from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict
|
34
|
+
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
35
|
+
from nucliadb.writer import logger, vectorsets
|
36
|
+
from nucliadb.writer.api.utils import only_for_onprem
|
36
37
|
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX, api
|
37
38
|
from nucliadb.writer.utilities import get_processing
|
39
|
+
from nucliadb_models.external_index_providers import (
|
40
|
+
ExternalIndexProviderType,
|
41
|
+
PineconeServerlessCloud,
|
42
|
+
)
|
38
43
|
from nucliadb_models.resource import (
|
39
44
|
KnowledgeBoxConfig,
|
40
45
|
KnowledgeBoxObj,
|
41
46
|
KnowledgeBoxObjID,
|
42
47
|
NucliaDBRoles,
|
43
48
|
)
|
49
|
+
from nucliadb_protos import knowledgebox_pb2
|
44
50
|
from nucliadb_utils.authentication import requires
|
45
|
-
from nucliadb_utils.utilities import get_ingest
|
46
51
|
|
47
52
|
|
53
|
+
@only_for_onprem
|
48
54
|
@api.post(
|
49
55
|
f"/{KBS_PREFIX}",
|
50
56
|
status_code=201,
|
51
|
-
|
52
|
-
response_model=KnowledgeBoxObj,
|
57
|
+
summary="Create Knowledge Box",
|
53
58
|
tags=["Knowledge Boxes"],
|
54
59
|
openapi_extra={"x-hidden-operation": True},
|
55
60
|
)
|
56
61
|
@requires(NucliaDBRoles.MANAGER)
|
57
62
|
@version(1)
|
58
|
-
async def
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
kbobj: NewKnowledgeBoxResponse = await ingest.NewKnowledgeBox(requestpb) # type: ignore
|
63
|
-
if item.slug != "":
|
64
|
-
slug = item.slug
|
65
|
-
else:
|
66
|
-
slug = kbobj.uuid # type: ignore
|
67
|
-
if kbobj.status == KnowledgeBoxResponseStatus.OK:
|
68
|
-
return KnowledgeBoxObj(uuid=kbobj.uuid, slug=slug)
|
69
|
-
elif kbobj.status == KnowledgeBoxResponseStatus.CONFLICT:
|
63
|
+
async def create_kb_endpoint(request: Request, item: KnowledgeBoxConfig) -> KnowledgeBoxObj:
|
64
|
+
try:
|
65
|
+
kbid, slug = await create_kb(item)
|
66
|
+
except KnowledgeBoxConflict:
|
70
67
|
raise HTTPException(status_code=419, detail="Knowledge box already exists")
|
71
|
-
|
72
|
-
raise HTTPException(status_code=
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
68
|
+
except ExternalIndexCreationError as exc:
|
69
|
+
raise HTTPException(status_code=502, detail=str(exc))
|
70
|
+
except Exception:
|
71
|
+
logger.exception("Could not create KB")
|
72
|
+
raise HTTPException(status_code=500, detail="Error creating knowledge box")
|
73
|
+
else:
|
74
|
+
return KnowledgeBoxObj(uuid=kbid, slug=slug)
|
75
|
+
|
76
|
+
|
77
|
+
async def create_kb(item: KnowledgeBoxConfig) -> tuple[str, str]:
|
78
|
+
driver = get_driver()
|
79
|
+
rollback_learning_config = None
|
80
|
+
|
81
|
+
kbid = KnowledgeBox.new_unique_kbid()
|
82
|
+
|
83
|
+
# Onprem KB creation doesn't have an existing learning configuration yet, so
|
84
|
+
# we need to call learning proxy to create it
|
85
85
|
if item.learning_configuration:
|
86
|
-
|
87
|
-
|
86
|
+
user_learning_config = item.learning_configuration
|
87
|
+
else:
|
88
|
+
logger.warning(
|
89
|
+
"No learning configuration provided. Default will be used.",
|
90
|
+
extra={"kbid": kbid},
|
91
|
+
)
|
92
|
+
# learning will choose the default values
|
93
|
+
user_learning_config = {}
|
94
|
+
|
95
|
+
# We need to be backward compatible with the old "semantic_model" field where
|
96
|
+
# only one semantic model was allowed.
|
97
|
+
if "semantic_model" in user_learning_config:
|
98
|
+
user_learning_config["semantic_models"] = [user_learning_config.pop("semantic_model")]
|
88
99
|
|
100
|
+
# we rely on learning to return the updated configuration with defaults and
|
101
|
+
# any other needed values (e.g. matryoshka settings if available)
|
102
|
+
learning_config = await learning_proxy.set_configuration(kbid, config=user_learning_config)
|
89
103
|
|
104
|
+
# if KB creation fails, we'll have to delete its learning config
|
105
|
+
async def _rollback_learning_config(kbid: str):
|
106
|
+
try:
|
107
|
+
await learning_proxy.delete_configuration(kbid)
|
108
|
+
except Exception:
|
109
|
+
logger.warning(
|
110
|
+
"Could not rollback learning configuration",
|
111
|
+
exc_info=True,
|
112
|
+
extra={"kbid": kbid},
|
113
|
+
)
|
114
|
+
|
115
|
+
rollback_learning_config = partial(_rollback_learning_config, kbid)
|
116
|
+
semantic_models = learning_config.into_semantic_models_metadata()
|
117
|
+
|
118
|
+
external_index_provider = knowledgebox_pb2.CreateExternalIndexProviderMetadata(
|
119
|
+
type=knowledgebox_pb2.ExternalIndexProviderType.UNSET,
|
120
|
+
)
|
121
|
+
if (
|
122
|
+
item.external_index_provider
|
123
|
+
and item.external_index_provider.type == ExternalIndexProviderType.PINECONE
|
124
|
+
):
|
125
|
+
pinecone_api_key = item.external_index_provider.api_key
|
126
|
+
serverless_pb = to_pinecone_serverless_cloud_pb(item.external_index_provider.serverless_cloud)
|
127
|
+
external_index_provider = knowledgebox_pb2.CreateExternalIndexProviderMetadata(
|
128
|
+
type=knowledgebox_pb2.ExternalIndexProviderType.PINECONE,
|
129
|
+
pinecone_config=knowledgebox_pb2.CreatePineconeConfig(
|
130
|
+
api_key=pinecone_api_key,
|
131
|
+
serverless_cloud=serverless_pb,
|
132
|
+
),
|
133
|
+
)
|
134
|
+
|
135
|
+
try:
|
136
|
+
(kbid, slug) = await KnowledgeBox.create(
|
137
|
+
driver,
|
138
|
+
kbid=kbid,
|
139
|
+
slug=item.slug or kbid,
|
140
|
+
title=item.title or "",
|
141
|
+
description=item.description or "",
|
142
|
+
semantic_models=semantic_models,
|
143
|
+
external_index_provider=external_index_provider,
|
144
|
+
hidden_resources_enabled=item.hidden_resources_enabled,
|
145
|
+
hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
|
146
|
+
)
|
147
|
+
|
148
|
+
except Exception as exc:
|
149
|
+
logger.error("Unexpected error creating KB", exc_info=exc, extra={"slug": item.slug})
|
150
|
+
await rollback_learning_config()
|
151
|
+
raise
|
152
|
+
|
153
|
+
return (kbid, slug)
|
154
|
+
|
155
|
+
|
156
|
+
@only_for_onprem
|
90
157
|
@api.patch(
|
91
158
|
f"/{KB_PREFIX}/{{kbid}}",
|
92
159
|
status_code=200,
|
93
|
-
|
160
|
+
summary="Update Knowledge Box",
|
94
161
|
response_model=KnowledgeBoxObjID,
|
95
162
|
tags=["Knowledge Boxes"],
|
96
163
|
openapi_extra={"x-hidden-operation": True},
|
97
164
|
)
|
98
165
|
@requires(NucliaDBRoles.MANAGER)
|
99
166
|
@version(1)
|
100
|
-
async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig):
|
101
|
-
|
102
|
-
|
103
|
-
if
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
167
|
+
async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> KnowledgeBoxObjID:
|
168
|
+
driver = get_driver()
|
169
|
+
config = None
|
170
|
+
if (
|
171
|
+
item.slug
|
172
|
+
or item.title
|
173
|
+
or item.description
|
174
|
+
or item.hidden_resources_enabled
|
175
|
+
or item.hidden_resources_hide_on_creation
|
176
|
+
):
|
177
|
+
config = knowledgebox_pb2.KnowledgeBoxConfig(
|
178
|
+
slug=item.slug or "",
|
179
|
+
title=item.title or "",
|
180
|
+
description=item.description or "",
|
181
|
+
hidden_resources_enabled=item.hidden_resources_enabled,
|
182
|
+
hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
|
183
|
+
)
|
184
|
+
try:
|
185
|
+
async with driver.transaction() as txn:
|
186
|
+
await KnowledgeBox.update(
|
187
|
+
txn,
|
188
|
+
uuid=kbid,
|
189
|
+
slug=item.slug,
|
190
|
+
config=config,
|
191
|
+
)
|
192
|
+
await txn.commit()
|
193
|
+
except datamanagers.exceptions.KnowledgeBoxNotFound:
|
113
194
|
raise HTTPException(status_code=404, detail="Knowledge box does not exist")
|
114
|
-
|
115
|
-
|
195
|
+
except Exception as exc:
|
196
|
+
logger.exception("Could not update KB", exc_info=exc, extra={"kbid": kbid})
|
197
|
+
raise HTTPException(status_code=500, detail="Error updating knowledge box")
|
198
|
+
else:
|
199
|
+
return KnowledgeBoxObjID(uuid=kbid)
|
116
200
|
|
117
201
|
|
202
|
+
@only_for_onprem
|
118
203
|
@api.delete(
|
119
204
|
f"/{KB_PREFIX}/{{kbid}}",
|
120
205
|
status_code=200,
|
121
|
-
|
122
|
-
response_model=KnowledgeBoxObj,
|
206
|
+
summary="Delete Knowledge Box",
|
123
207
|
tags=["Knowledge Boxes"],
|
124
208
|
openapi_extra={"x-hidden-operation": True},
|
125
209
|
)
|
126
210
|
@requires(NucliaDBRoles.MANAGER)
|
127
211
|
@version(1)
|
128
|
-
async def delete_kb(request: Request, kbid: str):
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
)
|
134
|
-
if kbobj.status == KnowledgeBoxResponseStatus.OK:
|
135
|
-
return KnowledgeBoxObj(uuid=kbid)
|
136
|
-
elif kbobj.status == KnowledgeBoxResponseStatus.NOTFOUND:
|
212
|
+
async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
213
|
+
driver = get_driver()
|
214
|
+
try:
|
215
|
+
await KnowledgeBox.delete(driver, kbid=kbid)
|
216
|
+
except datamanagers.exceptions.KnowledgeBoxNotFound:
|
137
217
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exists")
|
138
|
-
|
139
|
-
|
218
|
+
except Exception as exc:
|
219
|
+
logger.exception("Could not delete KB", exc_info=exc, extra={"kbid": kbid})
|
220
|
+
raise HTTPException(status_code=500, detail="Error deleting knowledge box")
|
221
|
+
|
222
|
+
# onprem nucliadb must delete its learning configuration
|
223
|
+
try:
|
224
|
+
await learning_proxy.delete_configuration(kbid)
|
225
|
+
logger.info("Learning configuration deleted", extra={"kbid": kbid})
|
226
|
+
except Exception as exc:
|
227
|
+
logger.exception(
|
228
|
+
"Unexpected error deleting learning configuration",
|
229
|
+
exc_info=exc,
|
230
|
+
extra={"kbid": kbid},
|
231
|
+
)
|
140
232
|
|
233
|
+
# be nice and notify processing this KB is being deleted so we waste
|
234
|
+
# resources
|
141
235
|
processing = get_processing()
|
142
236
|
asyncio.create_task(processing.delete_from_processing(kbid=kbid))
|
143
237
|
|
144
|
-
return
|
238
|
+
return KnowledgeBoxObj(uuid=kbid)
|
239
|
+
|
240
|
+
|
241
|
+
def to_pinecone_serverless_cloud_pb(
|
242
|
+
serverless: PineconeServerlessCloud,
|
243
|
+
) -> knowledgebox_pb2.PineconeServerlessCloud.ValueType:
|
244
|
+
return {
|
245
|
+
PineconeServerlessCloud.AWS_EU_WEST_1: knowledgebox_pb2.PineconeServerlessCloud.AWS_EU_WEST_1,
|
246
|
+
PineconeServerlessCloud.AWS_US_EAST_1: knowledgebox_pb2.PineconeServerlessCloud.AWS_US_EAST_1,
|
247
|
+
PineconeServerlessCloud.AWS_US_WEST_2: knowledgebox_pb2.PineconeServerlessCloud.AWS_US_WEST_2,
|
248
|
+
PineconeServerlessCloud.AZURE_EASTUS2: knowledgebox_pb2.PineconeServerlessCloud.AZURE_EASTUS2,
|
249
|
+
PineconeServerlessCloud.GCP_US_CENTRAL1: knowledgebox_pb2.PineconeServerlessCloud.GCP_US_CENTRAL1,
|
250
|
+
}[serverless]
|
251
|
+
|
252
|
+
|
253
|
+
@api.post(
|
254
|
+
f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
|
255
|
+
status_code=200,
|
256
|
+
summary="Add a vectorset to Knowledge Box",
|
257
|
+
tags=["Knowledge Boxes"],
|
258
|
+
# TODO: remove when the feature is mature
|
259
|
+
include_in_schema=False,
|
260
|
+
)
|
261
|
+
@requires(NucliaDBRoles.MANAGER)
|
262
|
+
@version(1)
|
263
|
+
async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
|
264
|
+
try:
|
265
|
+
await vectorsets.add(kbid, vectorset_id)
|
266
|
+
except learning_proxy.ProxiedLearningConfigError as err:
|
267
|
+
return Response(
|
268
|
+
status_code=err.status_code,
|
269
|
+
content=err.content,
|
270
|
+
media_type=err.content_type,
|
271
|
+
)
|
272
|
+
return Response(status_code=200)
|
273
|
+
|
274
|
+
|
275
|
+
@api.delete(
|
276
|
+
f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
|
277
|
+
status_code=200,
|
278
|
+
summary="Delete vectorset from Knowledge Box",
|
279
|
+
tags=["Knowledge Boxes"],
|
280
|
+
# TODO: remove when the feature is mature
|
281
|
+
include_in_schema=False,
|
282
|
+
)
|
283
|
+
@requires(NucliaDBRoles.MANAGER)
|
284
|
+
@version(1)
|
285
|
+
async def delete_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
|
286
|
+
try:
|
287
|
+
await vectorsets.delete(kbid, vectorset_id)
|
288
|
+
except learning_proxy.ProxiedLearningConfigError as err:
|
289
|
+
return Response(
|
290
|
+
status_code=err.status_code,
|
291
|
+
content=err.content,
|
292
|
+
media_type=err.content_type,
|
293
|
+
)
|
294
|
+
return Response(status_code=200)
|
@@ -29,7 +29,7 @@ from nucliadb_utils.authentication import requires
|
|
29
29
|
@api.post(
|
30
30
|
path=f"/{KB_PREFIX}/{{kbid}}/configuration",
|
31
31
|
status_code=204,
|
32
|
-
|
32
|
+
summary="Create Knowledge Box models configuration",
|
33
33
|
description="Create configuration of models assigned to a Knowledge Box",
|
34
34
|
response_model=None,
|
35
35
|
tags=["Knowledge Boxes"],
|
@@ -46,7 +46,7 @@ async def set_configuration(
|
|
46
46
|
@api.patch(
|
47
47
|
path=f"/{KB_PREFIX}/{{kbid}}/configuration",
|
48
48
|
status_code=204,
|
49
|
-
|
49
|
+
summary="Update Knowledge Box models configuration",
|
50
50
|
description="Update current configuration of models assigned to a Knowledge Box",
|
51
51
|
response_model=None,
|
52
52
|
tags=["Knowledge Boxes"],
|