nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,84 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from typing import Optional
|
22
|
+
|
23
|
+
from google.protobuf.message import Message
|
24
|
+
|
25
|
+
from nucliadb.common.datamanagers.utils import get_kv_pb
|
26
|
+
from nucliadb.common.maindb.driver import Transaction
|
27
|
+
from nucliadb_protos import writer_pb2
|
28
|
+
|
29
|
+
KB_RESOURCE_FIELD = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}"
|
30
|
+
KB_RESOURCE_FIELD_ERROR = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/error"
|
31
|
+
|
32
|
+
|
33
|
+
async def get_raw(
|
34
|
+
txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
|
35
|
+
) -> Optional[bytes]:
|
36
|
+
key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
37
|
+
return await txn.get(key)
|
38
|
+
|
39
|
+
|
40
|
+
async def set(
|
41
|
+
txn: Transaction,
|
42
|
+
*,
|
43
|
+
kbid: str,
|
44
|
+
rid: str,
|
45
|
+
field_type: str,
|
46
|
+
field_id: str,
|
47
|
+
value: Message,
|
48
|
+
):
|
49
|
+
key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
50
|
+
await txn.set(key, value.SerializeToString())
|
51
|
+
|
52
|
+
|
53
|
+
async def delete(txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str):
|
54
|
+
base_key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
55
|
+
# Make sure we explicitly delete the field and any nested key
|
56
|
+
keys_to_delete = []
|
57
|
+
async for key in txn.keys(base_key):
|
58
|
+
keys_to_delete.append(key)
|
59
|
+
|
60
|
+
for key in keys_to_delete:
|
61
|
+
await txn.delete(key)
|
62
|
+
|
63
|
+
|
64
|
+
# Error
|
65
|
+
|
66
|
+
|
67
|
+
async def get_error(
|
68
|
+
txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
|
69
|
+
) -> Optional[writer_pb2.Error]:
|
70
|
+
key = KB_RESOURCE_FIELD_ERROR.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
71
|
+
return await get_kv_pb(txn, key, writer_pb2.Error)
|
72
|
+
|
73
|
+
|
74
|
+
async def set_error(
|
75
|
+
txn: Transaction,
|
76
|
+
*,
|
77
|
+
kbid: str,
|
78
|
+
rid: str,
|
79
|
+
field_type: str,
|
80
|
+
field_id: str,
|
81
|
+
error: writer_pb2.Error,
|
82
|
+
):
|
83
|
+
key = KB_RESOURCE_FIELD_ERROR.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
84
|
+
await txn.set(key, error.SerializeToString())
|
@@ -33,10 +33,8 @@ KB_SLUGS = KB_SLUGS_BASE + "{slug}"
|
|
33
33
|
logger = logging.getLogger(__name__)
|
34
34
|
|
35
35
|
|
36
|
-
async def get_kbs(
|
37
|
-
|
38
|
-
) -> AsyncIterator[tuple[str, str]]:
|
39
|
-
async for key in txn.keys(KB_SLUGS.format(slug=prefix), count=-1):
|
36
|
+
async def get_kbs(txn: Transaction, *, prefix: str = "") -> AsyncIterator[tuple[str, str]]:
|
37
|
+
async for key in txn.keys(KB_SLUGS.format(slug=prefix)):
|
40
38
|
slug = key.replace(KB_SLUGS_BASE, "")
|
41
39
|
uuid = await get_kb_uuid(txn, slug=slug)
|
42
40
|
if uuid is None:
|
@@ -46,22 +44,32 @@ async def get_kbs(
|
|
46
44
|
|
47
45
|
|
48
46
|
async def exists_kb(txn: Transaction, *, kbid: str) -> bool:
|
49
|
-
return await get_config(txn, kbid=kbid) is not None
|
47
|
+
return await get_config(txn, kbid=kbid, for_update=False) is not None
|
50
48
|
|
51
49
|
|
52
50
|
async def get_kb_uuid(txn: Transaction, *, slug: str) -> Optional[str]:
|
53
|
-
uuid = await txn.get(KB_SLUGS.format(slug=slug))
|
51
|
+
uuid = await txn.get(KB_SLUGS.format(slug=slug), for_update=False)
|
54
52
|
if uuid is not None:
|
55
53
|
return uuid.decode()
|
56
54
|
else:
|
57
55
|
return None
|
58
56
|
|
59
57
|
|
58
|
+
async def set_kbid_for_slug(txn: Transaction, *, slug: str, kbid: str):
|
59
|
+
key = KB_SLUGS.format(slug=slug)
|
60
|
+
await txn.set(key, kbid.encode())
|
61
|
+
|
62
|
+
|
63
|
+
async def delete_kb_slug(txn: Transaction, *, slug: str):
|
64
|
+
key = KB_SLUGS.format(slug=slug)
|
65
|
+
await txn.delete(key)
|
66
|
+
|
67
|
+
|
60
68
|
async def get_config(
|
61
|
-
txn: Transaction, *, kbid: str
|
69
|
+
txn: Transaction, *, kbid: str, for_update: bool = False
|
62
70
|
) -> Optional[knowledgebox_pb2.KnowledgeBoxConfig]:
|
63
71
|
key = KB_UUID.format(kbid=kbid)
|
64
|
-
payload = await txn.get(key)
|
72
|
+
payload = await txn.get(key, for_update=for_update)
|
65
73
|
if payload is None:
|
66
74
|
return None
|
67
75
|
response = knowledgebox_pb2.KnowledgeBoxConfig()
|
@@ -69,17 +77,18 @@ async def get_config(
|
|
69
77
|
return response
|
70
78
|
|
71
79
|
|
72
|
-
async def set_config(
|
73
|
-
txn: Transaction, *, kbid: str, config: knowledgebox_pb2.KnowledgeBoxConfig
|
74
|
-
):
|
80
|
+
async def set_config(txn: Transaction, *, kbid: str, config: knowledgebox_pb2.KnowledgeBoxConfig):
|
75
81
|
key = KB_UUID.format(kbid=kbid)
|
76
82
|
await txn.set(key, config.SerializeToString())
|
77
83
|
|
78
84
|
|
79
|
-
async def
|
80
|
-
|
81
|
-
|
82
|
-
|
85
|
+
async def delete_config(txn: Transaction, *, kbid: str) -> None:
|
86
|
+
key = KB_UUID.format(kbid=kbid)
|
87
|
+
await txn.delete(key)
|
88
|
+
|
89
|
+
|
90
|
+
async def get_model_metadata(txn: Transaction, *, kbid: str) -> knowledgebox_pb2.SemanticModelMetadata:
|
91
|
+
shards_obj = await cluster.get_kb_shards(txn, kbid=kbid, for_update=False)
|
83
92
|
if shards_obj is None:
|
84
93
|
raise KnowledgeBoxNotFound(kbid)
|
85
94
|
if shards_obj.HasField("model"):
|
@@ -87,30 +96,67 @@ async def get_model_metadata(
|
|
87
96
|
else:
|
88
97
|
# B/c code for old KBs that do not have the `model` attribute set in the Shards object.
|
89
98
|
# Cleanup this code after a migration is done unifying all fields under `model` (on-prem and cloud).
|
90
|
-
return knowledgebox_pb2.SemanticModelMetadata(
|
91
|
-
similarity_function=shards_obj.similarity
|
92
|
-
)
|
99
|
+
return knowledgebox_pb2.SemanticModelMetadata(similarity_function=shards_obj.similarity)
|
93
100
|
|
94
101
|
|
102
|
+
# DEPRECATED: this function should be removed once the "default" vectorset
|
103
|
+
# concept is removed and processing sends us all messages with a vectorset_id
|
95
104
|
async def get_matryoshka_vector_dimension(
|
96
|
-
txn: Transaction,
|
105
|
+
txn: Transaction,
|
106
|
+
*,
|
107
|
+
kbid: str,
|
108
|
+
vectorset_id: Optional[str] = None,
|
97
109
|
) -> Optional[int]:
|
98
110
|
"""Return vector dimension for matryoshka models"""
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
len(
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
111
|
+
from . import vectorsets
|
112
|
+
|
113
|
+
async for _, vs in vectorsets.iter(txn, kbid=kbid):
|
114
|
+
if len(vs.matryoshka_dimensions) > 0 and vs.vectorset_index_config.vector_dimension:
|
115
|
+
if vs.vectorset_index_config.vector_dimension in vs.matryoshka_dimensions:
|
116
|
+
return vs.vectorset_index_config.vector_dimension
|
117
|
+
else:
|
118
|
+
logger.error(
|
119
|
+
"KB has an invalid matryoshka dimension!",
|
120
|
+
extra={
|
121
|
+
"kbid": kbid,
|
122
|
+
"vector_dimension": vs.vectorset_index_config.vector_dimension,
|
123
|
+
"matryoshka_dimensions": vs.matryoshka_dimensions,
|
124
|
+
},
|
125
|
+
)
|
126
|
+
return None
|
127
|
+
else:
|
128
|
+
# fallback for KBs that don't have vectorset
|
129
|
+
model_metadata = await get_model_metadata(txn, kbid=kbid)
|
130
|
+
dimension = None
|
131
|
+
if len(model_metadata.matryoshka_dimensions) > 0 and model_metadata.vector_dimension:
|
132
|
+
if model_metadata.vector_dimension in model_metadata.matryoshka_dimensions:
|
133
|
+
dimension = model_metadata.vector_dimension
|
134
|
+
else:
|
135
|
+
logger.error(
|
136
|
+
"KB has an invalid matryoshka dimension!",
|
137
|
+
extra={
|
138
|
+
"kbid": kbid,
|
139
|
+
"vector_dimension": model_metadata.vector_dimension,
|
140
|
+
"matryoshka_dimensions": model_metadata.matryoshka_dimensions,
|
141
|
+
},
|
142
|
+
)
|
143
|
+
return dimension
|
144
|
+
|
145
|
+
|
146
|
+
async def get_external_index_provider_metadata(
|
147
|
+
txn: Transaction, *, kbid: str
|
148
|
+
) -> Optional[knowledgebox_pb2.StoredExternalIndexProviderMetadata]:
|
149
|
+
kb_config = await get_config(txn, kbid=kbid)
|
150
|
+
if kb_config is None:
|
151
|
+
return None
|
152
|
+
return kb_config.external_index_provider
|
153
|
+
|
154
|
+
|
155
|
+
async def set_external_index_provider_metadata(
|
156
|
+
txn: Transaction, *, kbid: str, metadata: knowledgebox_pb2.StoredExternalIndexProviderMetadata
|
157
|
+
):
|
158
|
+
kb_config = await get_config(txn, kbid=kbid)
|
159
|
+
if kb_config is None:
|
160
|
+
raise KnowledgeBoxNotFound(kbid)
|
161
|
+
kb_config.external_index_provider.CopyFrom(metadata)
|
162
|
+
await set_config(txn, kbid=kbid, config=kb_config)
|
@@ -17,6 +17,7 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
import logging
|
20
21
|
from typing import Optional
|
21
22
|
|
22
23
|
import orjson
|
@@ -24,6 +25,8 @@ import orjson
|
|
24
25
|
from nucliadb.common.maindb.driver import Transaction
|
25
26
|
from nucliadb_protos import knowledgebox_pb2 as kb_pb2
|
26
27
|
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
27
30
|
KB_LABELS = "/kbs/{kbid}/labels"
|
28
31
|
KB_LABELSET = "/kbs/{kbid}/labels/{id}"
|
29
32
|
KB_LABELSET_IDS = "/kbs/{kbid}/ids-labels"
|
@@ -34,7 +37,9 @@ async def get_labels(txn: Transaction, *, kbid: str) -> kb_pb2.Labels:
|
|
34
37
|
Get all labels for a knowledge box (from multiple labelsets)
|
35
38
|
"""
|
36
39
|
labels = kb_pb2.Labels()
|
37
|
-
labelset_ids = await
|
40
|
+
labelset_ids = await _get_labelset_ids(txn, kbid=kbid)
|
41
|
+
if labelset_ids is None:
|
42
|
+
return labels
|
38
43
|
for labelset_id in labelset_ids:
|
39
44
|
labelset = await txn.get(KB_LABELSET.format(kbid=kbid, id=labelset_id))
|
40
45
|
if not labelset:
|
@@ -45,76 +50,41 @@ async def get_labels(txn: Transaction, *, kbid: str) -> kb_pb2.Labels:
|
|
45
50
|
return labels
|
46
51
|
|
47
52
|
|
48
|
-
async def _get_labelset_ids_bw_compat(txn: Transaction, *, kbid: str) -> list[str]:
|
49
|
-
labelsets = await _get_labelset_ids(txn, kbid=kbid)
|
50
|
-
if labelsets is not None:
|
51
|
-
return labelsets
|
52
|
-
# TODO: Remove this after migration #11
|
53
|
-
return await _deprecated_scan_labelset_ids(txn, kbid=kbid)
|
54
|
-
|
55
|
-
|
56
|
-
async def _deprecated_scan_labelset_ids(txn: Transaction, *, kbid: str) -> list[str]:
|
57
|
-
labelsets = []
|
58
|
-
labels_key = KB_LABELS.format(kbid=kbid)
|
59
|
-
async for key in txn.keys(labels_key, count=-1, include_start=False):
|
60
|
-
lsid = key.split("/")[-1]
|
61
|
-
labelsets.append(lsid)
|
62
|
-
return labelsets
|
63
|
-
|
64
|
-
|
65
53
|
async def _get_labelset_ids(txn: Transaction, *, kbid: str) -> Optional[list[str]]:
|
66
54
|
key = KB_LABELSET_IDS.format(kbid=kbid)
|
67
|
-
data = await txn.get(key)
|
55
|
+
data = await txn.get(key, for_update=True)
|
68
56
|
if not data:
|
69
57
|
return None
|
70
58
|
return orjson.loads(data)
|
71
59
|
|
72
60
|
|
73
|
-
async def _add_to_labelset_ids(
|
74
|
-
|
75
|
-
) -> None:
|
61
|
+
async def _add_to_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
|
62
|
+
updated = set(labelsets)
|
76
63
|
previous = await _get_labelset_ids(txn, kbid=kbid)
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
needs_set = True
|
85
|
-
previous.append(labelset)
|
86
|
-
if needs_set:
|
87
|
-
await _set_labelset_ids(txn, kbid=kbid, labelsets=previous)
|
88
|
-
|
89
|
-
|
90
|
-
async def _delete_from_labelset_ids(
|
91
|
-
txn: Transaction, *, kbid: str, labelsets: list[str]
|
92
|
-
) -> None:
|
93
|
-
needs_set = False
|
64
|
+
if previous is not None:
|
65
|
+
updated.update(previous)
|
66
|
+
if previous is None or previous != updated:
|
67
|
+
await _set_labelset_ids(txn, kbid=kbid, labelsets=list(updated))
|
68
|
+
|
69
|
+
|
70
|
+
async def _delete_from_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
|
94
71
|
previous = await _get_labelset_ids(txn, kbid=kbid)
|
95
72
|
if previous is None:
|
96
|
-
#
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
async def _set_labelset_ids(
|
108
|
-
txn: Transaction, *, kbid: str, labelsets: list[str]
|
109
|
-
) -> None:
|
73
|
+
# Nothing to delete
|
74
|
+
return
|
75
|
+
previous_set = set(previous)
|
76
|
+
updated = previous_set - set(labelsets)
|
77
|
+
if previous_set != updated:
|
78
|
+
await _set_labelset_ids(txn, kbid=kbid, labelsets=list(updated))
|
79
|
+
|
80
|
+
|
81
|
+
async def _set_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
|
110
82
|
key = KB_LABELSET_IDS.format(kbid=kbid)
|
111
83
|
data = orjson.dumps(labelsets)
|
112
84
|
await txn.set(key, data)
|
113
85
|
|
114
86
|
|
115
|
-
async def get_labelset(
|
116
|
-
txn: Transaction, *, kbid: str, labelset_id: str
|
117
|
-
) -> Optional[kb_pb2.LabelSet]:
|
87
|
+
async def get_labelset(txn: Transaction, *, kbid: str, labelset_id: str) -> Optional[kb_pb2.LabelSet]:
|
118
88
|
labelset_key = KB_LABELSET.format(kbid=kbid, id=labelset_id)
|
119
89
|
payload = await txn.get(labelset_key)
|
120
90
|
if payload:
|
@@ -28,9 +28,7 @@ logger = logging.getLogger(__name__)
|
|
28
28
|
PULL_PARTITION_OFFSET = "/processing/pull-offset/{pull_type_id}/{partition}"
|
29
29
|
|
30
30
|
|
31
|
-
async def get_pull_offset(
|
32
|
-
txn: Transaction, *, pull_type_id: str, partition: str
|
33
|
-
) -> Optional[int]:
|
31
|
+
async def get_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str) -> Optional[int]:
|
34
32
|
key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
|
35
33
|
val: Optional[bytes] = await txn.get(key)
|
36
34
|
if val is not None:
|
@@ -38,8 +36,6 @@ async def get_pull_offset(
|
|
38
36
|
return None
|
39
37
|
|
40
38
|
|
41
|
-
async def set_pull_offset(
|
42
|
-
txn: Transaction, *, pull_type_id: str, partition: str, offset: int
|
43
|
-
) -> None:
|
39
|
+
async def set_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str, offset: int) -> None:
|
44
40
|
key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
|
45
41
|
await txn.set(key, str(offset).encode())
|