nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
from pydantic import Field
|
21
|
+
from pydantic_settings import BaseSettings
|
22
|
+
|
23
|
+
|
24
|
+
class ExternalIndexProvidersSettings(BaseSettings):
|
25
|
+
pinecone_upsert_parallelism: int = Field(
|
26
|
+
default=3,
|
27
|
+
title="Pinecone upsert parallelism",
|
28
|
+
description="Number of parallel upserts to Pinecone on each set resource operation",
|
29
|
+
)
|
30
|
+
pinecone_delete_parallelism: int = Field(
|
31
|
+
default=2,
|
32
|
+
title="Pinecone delete parallelism",
|
33
|
+
description="Number of parallel deletes to Pinecone on each delete resource operation",
|
34
|
+
)
|
35
|
+
pinecone_upsert_timeout: float = Field(
|
36
|
+
default=10.0,
|
37
|
+
title="Pinecone upsert timeout",
|
38
|
+
description="Timeout in seconds for each upsert operation to Pinecone",
|
39
|
+
)
|
40
|
+
pinecone_delete_timeout: float = Field(
|
41
|
+
default=10.0,
|
42
|
+
title="Pinecone delete timeout",
|
43
|
+
description="Timeout in seconds for each delete operation to Pinecone",
|
44
|
+
)
|
45
|
+
pinecone_query_timeout: float = Field(
|
46
|
+
default=10.0,
|
47
|
+
title="Pinecone query timeout",
|
48
|
+
description="Timeout in seconds for each query operation to Pinecone",
|
49
|
+
)
|
50
|
+
|
51
|
+
|
52
|
+
settings = ExternalIndexProvidersSettings()
|
@@ -44,14 +44,11 @@ class NucliaAuthHTTPClient:
|
|
44
44
|
def __init__(self):
|
45
45
|
self.session = aiohttp.ClientSession()
|
46
46
|
self.base_url = (
|
47
|
-
nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone)
|
48
|
-
+ "/api"
|
47
|
+
nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone) + "/api"
|
49
48
|
)
|
50
49
|
self.headers = {}
|
51
50
|
if nuclia_settings.nuclia_service_account is not None:
|
52
|
-
self.headers["X-NUCLIA-NUAKEY"] =
|
53
|
-
f"Bearer {nuclia_settings.nuclia_service_account}"
|
54
|
-
)
|
51
|
+
self.headers["X-NUCLIA-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
|
55
52
|
|
56
53
|
async def __aenter__(self):
|
57
54
|
return self
|
@@ -67,4 +64,4 @@ class NucliaAuthHTTPClient:
|
|
67
64
|
async with self.session.get(url, headers=self.headers) as resp:
|
68
65
|
resp_text = await resp.text()
|
69
66
|
check_status(resp, resp_text)
|
70
|
-
return AuthInfoResponse.
|
67
|
+
return AuthInfoResponse.model_validate_json(resp_text)
|
@@ -48,10 +48,7 @@ def get_processing_api_url() -> str:
|
|
48
48
|
+ "/api/v1/processing"
|
49
49
|
)
|
50
50
|
else:
|
51
|
-
return
|
52
|
-
nuclia_settings.nuclia_processing_cluster_url
|
53
|
-
+ "/api/v1/internal/processing"
|
54
|
-
)
|
51
|
+
return nuclia_settings.nuclia_processing_cluster_url + "/api/v1/internal/processing"
|
55
52
|
|
56
53
|
|
57
54
|
class PullResponse(pydantic.BaseModel):
|
@@ -159,9 +156,7 @@ class ProcessingHTTPClient:
|
|
159
156
|
self.base_url = get_processing_api_url()
|
160
157
|
self.headers = {}
|
161
158
|
if nuclia_settings.nuclia_service_account is not None:
|
162
|
-
self.headers["X-STF-NUAKEY"] =
|
163
|
-
f"Bearer {nuclia_settings.nuclia_service_account}"
|
164
|
-
)
|
159
|
+
self.headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
|
165
160
|
|
166
161
|
async def __aenter__(self):
|
167
162
|
return self
|
@@ -187,7 +182,7 @@ class ProcessingHTTPClient:
|
|
187
182
|
async with self.session.get(url, headers=self.headers, params=params) as resp:
|
188
183
|
resp_text = await resp.text()
|
189
184
|
check_status(resp, resp_text)
|
190
|
-
return PullResponse.
|
185
|
+
return PullResponse.model_validate_json(resp_text)
|
191
186
|
|
192
187
|
async def pull_position(self, partition: str) -> int:
|
193
188
|
url = self.base_url + "/pull/position"
|
@@ -195,7 +190,7 @@ class ProcessingHTTPClient:
|
|
195
190
|
async with self.session.get(url, headers=self.headers, params=params) as resp:
|
196
191
|
resp_text = await resp.text()
|
197
192
|
check_status(resp, resp_text)
|
198
|
-
data = PullPosition.
|
193
|
+
data = PullPosition.model_validate_json(resp_text)
|
199
194
|
return data.cursor
|
200
195
|
|
201
196
|
async def requests(
|
@@ -217,7 +212,7 @@ class ProcessingHTTPClient:
|
|
217
212
|
async with self.session.get(url, headers=self.headers, params=params) as resp:
|
218
213
|
resp_text = await resp.text()
|
219
214
|
check_status(resp, resp_text)
|
220
|
-
return RequestsResults.
|
215
|
+
return RequestsResults.model_validate_json(resp_text)
|
221
216
|
|
222
217
|
async def stats(self, kbid: str, timeout: Optional[float] = 1.0) -> StatsResponse:
|
223
218
|
url = self.base_url + "/stats"
|
@@ -229,4 +224,4 @@ class ProcessingHTTPClient:
|
|
229
224
|
) as resp:
|
230
225
|
resp_text = await resp.text()
|
231
226
|
check_status(resp, resp_text)
|
232
|
-
return StatsResponse.
|
227
|
+
return StatsResponse.model_validate_json(resp_text)
|
@@ -30,9 +30,7 @@ def check_status(resp: aiohttp.ClientResponse, resp_text: str) -> None:
|
|
30
30
|
elif resp.status == 404:
|
31
31
|
raise exceptions.NotFoundException(f"Resource not found: {resp_text}")
|
32
32
|
elif resp.status in (401, 403):
|
33
|
-
raise exceptions.AuthorizationException(
|
34
|
-
f"Unauthorized to access: {resp.status}"
|
35
|
-
)
|
33
|
+
raise exceptions.AuthorizationException(f"Unauthorized to access: {resp.status}")
|
36
34
|
elif resp.status == 429:
|
37
35
|
raise exceptions.RateLimitException("Rate limited")
|
38
36
|
else:
|
nucliadb/common/ids.py
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""
|
22
|
+
This module aims to centralize how we build ids for resources, fields,
|
23
|
+
paragraphs... Avoiding spread of id construction and parsing everywhere
|
24
|
+
"""
|
25
|
+
|
26
|
+
from dataclasses import dataclass
|
27
|
+
from typing import Optional
|
28
|
+
|
29
|
+
from nucliadb_protos.resources_pb2 import FieldType
|
30
|
+
|
31
|
+
FIELD_TYPE_STR_TO_PB: dict[str, FieldType.ValueType] = {
|
32
|
+
"t": FieldType.TEXT,
|
33
|
+
"f": FieldType.FILE,
|
34
|
+
"u": FieldType.LINK,
|
35
|
+
"a": FieldType.GENERIC,
|
36
|
+
"c": FieldType.CONVERSATION,
|
37
|
+
}
|
38
|
+
|
39
|
+
FIELD_TYPE_PB_TO_STR = {v: k for k, v in FIELD_TYPE_STR_TO_PB.items()}
|
40
|
+
|
41
|
+
|
42
|
+
@dataclass
|
43
|
+
class FieldId:
|
44
|
+
"""
|
45
|
+
Field ids are used to identify fields in resources. They usually have the following format:
|
46
|
+
|
47
|
+
`rid/field_type/field_key`
|
48
|
+
|
49
|
+
where field type is one of: `t`, `f`, `u`, `a`, `c` (text, file, link, generic, conversation)
|
50
|
+
and field_key is an identifier for that field type on the resource, usually chosen by the user.
|
51
|
+
|
52
|
+
In some cases, fields can have subfields, for example, in conversations, where each part of the
|
53
|
+
conversation is a subfield. In those cases, the id has the following format:
|
54
|
+
|
55
|
+
`rid/field_type/field_key/subfield_id`
|
56
|
+
|
57
|
+
Examples:
|
58
|
+
|
59
|
+
>>> FieldId(rid="rid", type="u", key="/my-link")
|
60
|
+
FieldID("rid/u/my-link")
|
61
|
+
>>> FieldId.from_string("rid/u/my-link")
|
62
|
+
FieldID("rid/u/my-link")
|
63
|
+
"""
|
64
|
+
|
65
|
+
rid: str
|
66
|
+
type: str
|
67
|
+
key: str
|
68
|
+
# also knwon as `split`, this indicates a part of a field in, for example, conversations
|
69
|
+
subfield_id: Optional[str] = None
|
70
|
+
|
71
|
+
def __repr__(self) -> str:
|
72
|
+
return f"FieldId({self.full()})"
|
73
|
+
|
74
|
+
def short_without_subfield(self) -> str:
|
75
|
+
return f"/{self.type}/{self.key}"
|
76
|
+
|
77
|
+
def full(self) -> str:
|
78
|
+
if self.subfield_id is None:
|
79
|
+
return f"{self.rid}/{self.type}/{self.key}"
|
80
|
+
else:
|
81
|
+
return f"{self.rid}/{self.type}/{self.key}/{self.subfield_id}"
|
82
|
+
|
83
|
+
def __hash__(self) -> int:
|
84
|
+
return hash(self.full())
|
85
|
+
|
86
|
+
@property
|
87
|
+
def pb_type(self) -> FieldType.ValueType:
|
88
|
+
return FIELD_TYPE_STR_TO_PB[self.type]
|
89
|
+
|
90
|
+
@classmethod
|
91
|
+
def from_pb(
|
92
|
+
cls, rid: str, field_type: FieldType.ValueType, key: str, subfield_id: Optional[str] = None
|
93
|
+
) -> "FieldId":
|
94
|
+
return cls(rid=rid, type=FIELD_TYPE_PB_TO_STR[field_type], key=key, subfield_id=subfield_id)
|
95
|
+
|
96
|
+
@classmethod
|
97
|
+
def from_string(cls, value: str) -> "FieldId":
|
98
|
+
"""
|
99
|
+
Parse a FieldId from a string
|
100
|
+
Example:
|
101
|
+
>>> fid = FieldId.from_string("rid/u/foo")
|
102
|
+
>>> fid
|
103
|
+
FieldId("rid/u/foo")
|
104
|
+
>>> fid.type
|
105
|
+
'u'
|
106
|
+
>>> fid.key
|
107
|
+
'foo'
|
108
|
+
>>> FieldId.from_string("rid/u/foo/subfield_id").subfield_id
|
109
|
+
'subfield_id'
|
110
|
+
"""
|
111
|
+
parts = value.split("/")
|
112
|
+
if len(parts) == 3:
|
113
|
+
rid, _type, key = parts
|
114
|
+
if _type not in FIELD_TYPE_STR_TO_PB:
|
115
|
+
raise ValueError(f"Invalid FieldId: {value}")
|
116
|
+
return cls(rid=rid, type=_type, key=key)
|
117
|
+
elif len(parts) == 4:
|
118
|
+
rid, _type, key, subfield_id = parts
|
119
|
+
if _type not in FIELD_TYPE_STR_TO_PB:
|
120
|
+
raise ValueError(f"Invalid FieldId: {value}")
|
121
|
+
return cls(
|
122
|
+
rid=rid,
|
123
|
+
type=_type,
|
124
|
+
key=key,
|
125
|
+
subfield_id=subfield_id,
|
126
|
+
)
|
127
|
+
else:
|
128
|
+
raise ValueError(f"Invalid FieldId: {value}")
|
129
|
+
|
130
|
+
|
131
|
+
@dataclass
|
132
|
+
class ParagraphId:
|
133
|
+
field_id: FieldId
|
134
|
+
paragraph_start: int
|
135
|
+
paragraph_end: int
|
136
|
+
|
137
|
+
def __repr__(self) -> str:
|
138
|
+
return f"ParagraphId({self.full()})"
|
139
|
+
|
140
|
+
def full(self) -> str:
|
141
|
+
return f"{self.field_id.full()}/{self.paragraph_start}-{self.paragraph_end}"
|
142
|
+
|
143
|
+
def __hash__(self) -> int:
|
144
|
+
return hash(self.full())
|
145
|
+
|
146
|
+
@property
|
147
|
+
def rid(self) -> str:
|
148
|
+
return self.field_id.rid
|
149
|
+
|
150
|
+
@classmethod
|
151
|
+
def from_string(cls, value: str) -> "ParagraphId":
|
152
|
+
parts = value.split("/")
|
153
|
+
paragraph_range = parts[-1]
|
154
|
+
start, end = map(int, paragraph_range.split("-"))
|
155
|
+
field_id = FieldId.from_string("/".join(parts[:-1]))
|
156
|
+
return cls(field_id=field_id, paragraph_start=start, paragraph_end=end)
|
157
|
+
|
158
|
+
@classmethod
|
159
|
+
def from_vector_id(cls, vid: "VectorId") -> "ParagraphId":
|
160
|
+
"""
|
161
|
+
Returns a ParagraphId from a vector_key (the index part of the vector_key is ignored).
|
162
|
+
>>> vid = VectorId.from_string("rid/u/field_id/0/0-1")
|
163
|
+
>>> ParagraphId.from_vector_id(vid)
|
164
|
+
ParagraphId("rid/u/field_id/0-1")
|
165
|
+
"""
|
166
|
+
return cls(
|
167
|
+
field_id=vid.field_id,
|
168
|
+
paragraph_start=vid.vector_start,
|
169
|
+
paragraph_end=vid.vector_end,
|
170
|
+
)
|
171
|
+
|
172
|
+
|
173
|
+
@dataclass
|
174
|
+
class VectorId:
|
175
|
+
"""
|
176
|
+
Ids of vectors are very similar to ParagraphIds, but for legacy reasons, they have an index
|
177
|
+
indicating the position of the corresponding text block in the list of text blocks for the field.
|
178
|
+
|
179
|
+
Examples:
|
180
|
+
|
181
|
+
>>> VectorId.from_string("rid/u/field_id/0/0-10")
|
182
|
+
VectorId("rid/u/field_id/0/0-10")
|
183
|
+
>>> VectorId(
|
184
|
+
... field_id=FieldId.from_string("rid/u/field_id"),
|
185
|
+
... index=0,
|
186
|
+
... vector_start=0,
|
187
|
+
... vector_end=10,
|
188
|
+
... )
|
189
|
+
VectorId("rid/u/field_id/0/0-10")
|
190
|
+
"""
|
191
|
+
|
192
|
+
field_id: FieldId
|
193
|
+
index: int
|
194
|
+
vector_start: int
|
195
|
+
vector_end: int
|
196
|
+
|
197
|
+
def __repr__(self) -> str:
|
198
|
+
return f"VectorId({self.full()})"
|
199
|
+
|
200
|
+
def full(self) -> str:
|
201
|
+
return f"{self.field_id.full()}/{self.index}/{self.vector_start}-{self.vector_end}"
|
202
|
+
|
203
|
+
def __hash__(self) -> int:
|
204
|
+
return hash(self.full())
|
205
|
+
|
206
|
+
@property
|
207
|
+
def rid(self) -> str:
|
208
|
+
return self.field_id.rid
|
209
|
+
|
210
|
+
@classmethod
|
211
|
+
def from_string(cls, value: str) -> "VectorId":
|
212
|
+
parts = value.split("/")
|
213
|
+
vector_range = parts[-1]
|
214
|
+
start, end = map(int, vector_range.split("-"))
|
215
|
+
index = int(parts[-2])
|
216
|
+
field_id = FieldId.from_string("/".join(parts[:-2]))
|
217
|
+
return cls(field_id=field_id, index=index, vector_start=start, vector_end=end)
|
218
|
+
|
219
|
+
|
220
|
+
def extract_data_augmentation_id(generated_field_id: str) -> Optional[str]:
|
221
|
+
"""Data augmentation generated fields have a strict id with the following
|
222
|
+
format:
|
223
|
+
`da-{task_id}-{original:field_type}-{original:field_id}[-{original:split}]`
|
224
|
+
|
225
|
+
@return the `task_id`
|
226
|
+
|
227
|
+
ATENTION: we are assuming ids have been properly generated and `-` is not a
|
228
|
+
valid character, otherwise, this extraction would be wrong and a partial id
|
229
|
+
would be returned.
|
230
|
+
|
231
|
+
"""
|
232
|
+
parts = generated_field_id.split("-")
|
233
|
+
|
234
|
+
if len(parts) < 4:
|
235
|
+
return None
|
236
|
+
|
237
|
+
if parts[0] != "da":
|
238
|
+
return None
|
239
|
+
|
240
|
+
return parts[1] or None
|
nucliadb/common/locking.py
CHANGED
@@ -35,6 +35,7 @@ logger = logging.getLogger(__name__)
|
|
35
35
|
|
36
36
|
NEW_SHARD_LOCK = "new-shard-{kbid}"
|
37
37
|
RESOURCE_INDEX_LOCK = "resource-index-{kbid}-{resource_id}"
|
38
|
+
RESOURCE_CREATION_SLUG_LOCK = "resource-creation-{kbid}-{resource_slug}"
|
38
39
|
KB_SHARDS_LOCK = "shards-kb-{kbid}"
|
39
40
|
MIGRATIONS_LOCK = "migration"
|
40
41
|
|
@@ -83,7 +84,7 @@ class _Lock:
|
|
83
84
|
else:
|
84
85
|
if time.time() > lock_data.expires_at:
|
85
86
|
# if current time is greater than when it expires, take it over
|
86
|
-
await self.
|
87
|
+
await self._update_lock_value(txn)
|
87
88
|
await txn.commit()
|
88
89
|
break
|
89
90
|
|
@@ -99,24 +100,36 @@ class _Lock:
|
|
99
100
|
return self
|
100
101
|
|
101
102
|
async def get_lock_data(self, txn: Transaction) -> Optional[LockValue]:
|
102
|
-
existing_data = await txn.get(self.key)
|
103
|
+
existing_data = await txn.get(self.key, for_update=True)
|
103
104
|
if existing_data is None:
|
104
105
|
return None
|
105
106
|
else:
|
106
107
|
return LockValue(**orjson.loads(existing_data))
|
107
108
|
|
108
|
-
async def
|
109
|
+
async def _update_lock_value(self, txn: Transaction) -> None:
|
110
|
+
"""
|
111
|
+
Update the value for the lock.
|
112
|
+
"""
|
109
113
|
await txn.set(
|
110
114
|
self.key,
|
111
115
|
orjson.dumps(LockValue(self.value, time.time() + self.expire_timeout)),
|
112
116
|
)
|
113
117
|
|
118
|
+
async def _set_lock_value(self, txn: Transaction) -> None:
|
119
|
+
"""
|
120
|
+
Set the value for the lock. If lock already exists, it doesn't update and raises a ConflictError.
|
121
|
+
"""
|
122
|
+
await txn.insert(
|
123
|
+
self.key,
|
124
|
+
orjson.dumps(LockValue(self.value, time.time() + self.expire_timeout)),
|
125
|
+
)
|
126
|
+
|
114
127
|
async def _refresh_task(self) -> None:
|
115
128
|
while True:
|
116
129
|
try:
|
117
130
|
await asyncio.sleep(self.refresh_timeout)
|
118
131
|
async with self.driver.transaction() as txn:
|
119
|
-
await self.
|
132
|
+
await self._update_lock_value(txn)
|
120
133
|
await txn.commit()
|
121
134
|
except (asyncio.CancelledError, RuntimeError):
|
122
135
|
return
|
@@ -137,10 +150,19 @@ class _Lock:
|
|
137
150
|
|
138
151
|
def distributed_lock(
|
139
152
|
key: str,
|
140
|
-
lock_timeout: float = 60.0,
|
141
|
-
expire_timeout: float = 30.0,
|
142
|
-
refresh_timeout: float = 10.0,
|
153
|
+
lock_timeout: float = 60.0,
|
154
|
+
expire_timeout: float = 30.0,
|
155
|
+
refresh_timeout: float = 10.0,
|
143
156
|
) -> _Lock:
|
157
|
+
"""
|
158
|
+
Context manager to get a distributed lock on a key.
|
159
|
+
|
160
|
+
Params:
|
161
|
+
- key: the key to lock with
|
162
|
+
- lock_timeout: maximum time to wait for the lock before ResourceLocked is raised.
|
163
|
+
- expire_timeout: how long by default the lock will be held without a refresh
|
164
|
+
- refresh_timeout: how often to refresh the lock
|
165
|
+
"""
|
144
166
|
return _Lock(
|
145
167
|
key,
|
146
168
|
lock_timeout=lock_timeout,
|
nucliadb/common/maindb/driver.py
CHANGED
@@ -23,7 +23,7 @@ import asyncio
|
|
23
23
|
from contextlib import asynccontextmanager
|
24
24
|
from typing import AsyncGenerator, Optional
|
25
25
|
|
26
|
-
DEFAULT_SCAN_LIMIT =
|
26
|
+
DEFAULT_SCAN_LIMIT = -1
|
27
27
|
DEFAULT_BATCH_SCAN_LIMIT = 500
|
28
28
|
|
29
29
|
|
@@ -37,18 +37,24 @@ class Transaction:
|
|
37
37
|
async def commit(self):
|
38
38
|
raise NotImplementedError()
|
39
39
|
|
40
|
-
async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
|
40
|
+
async def batch_get(self, keys: list[str], for_update: bool = False) -> list[Optional[bytes]]:
|
41
41
|
raise NotImplementedError()
|
42
42
|
|
43
|
-
async def get(self, key: str) -> Optional[bytes]:
|
43
|
+
async def get(self, key: str, for_update: bool = False) -> Optional[bytes]:
|
44
44
|
raise NotImplementedError()
|
45
45
|
|
46
46
|
async def set(self, key: str, value: bytes):
|
47
47
|
raise NotImplementedError()
|
48
48
|
|
49
|
+
async def insert(self, key: str, value: bytes):
|
50
|
+
return await self.set(key, value)
|
51
|
+
|
49
52
|
async def delete(self, key: str):
|
50
53
|
raise NotImplementedError()
|
51
54
|
|
55
|
+
async def delete_by_prefix(self, prefix: str) -> None:
|
56
|
+
raise NotImplementedError()
|
57
|
+
|
52
58
|
def keys(
|
53
59
|
self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True
|
54
60
|
) -> AsyncGenerator[str, None]:
|
@@ -74,36 +80,6 @@ class Driver:
|
|
74
80
|
except Exception:
|
75
81
|
pass
|
76
82
|
|
77
|
-
async def begin(self, read_only: bool = False) -> Transaction:
|
78
|
-
raise NotImplementedError()
|
79
|
-
|
80
83
|
@asynccontextmanager
|
81
|
-
async def transaction(
|
82
|
-
|
83
|
-
) -> AsyncGenerator[Transaction, None]:
|
84
|
-
"""
|
85
|
-
Use to make sure transaction is always aborted.
|
86
|
-
|
87
|
-
:param wait_for_abort: If True, wait for abort to finish before returning.
|
88
|
-
If False, abort is done in background (unless there
|
89
|
-
is an error)
|
90
|
-
"""
|
91
|
-
txn: Optional[Transaction] = None
|
92
|
-
error: bool = False
|
93
|
-
try:
|
94
|
-
txn = await self.begin(read_only=read_only)
|
95
|
-
yield txn
|
96
|
-
except Exception:
|
97
|
-
error = True
|
98
|
-
raise
|
99
|
-
finally:
|
100
|
-
if txn is not None and txn.open:
|
101
|
-
if error or wait_for_abort:
|
102
|
-
await txn.abort()
|
103
|
-
else:
|
104
|
-
self._async_abort(txn)
|
105
|
-
|
106
|
-
def _async_abort(self, txn: Transaction):
|
107
|
-
task = asyncio.create_task(txn.abort())
|
108
|
-
task.add_done_callback(lambda task: self._abort_tasks.remove(task))
|
109
|
-
self._abort_tasks.append(task)
|
84
|
+
async def transaction(self, read_only: bool = False) -> AsyncGenerator[Transaction, None]:
|
85
|
+
yield Transaction()
|
nucliadb/common/maindb/local.py
CHANGED
@@ -19,7 +19,8 @@
|
|
19
19
|
#
|
20
20
|
import glob
|
21
21
|
import os
|
22
|
-
from
|
22
|
+
from contextlib import asynccontextmanager
|
23
|
+
from typing import AsyncGenerator, Optional
|
23
24
|
|
24
25
|
from nucliadb.common.maindb.driver import (
|
25
26
|
DEFAULT_BATCH_SCAN_LIMIT,
|
@@ -105,7 +106,7 @@ class LocalTransaction(Transaction):
|
|
105
106
|
self.clean()
|
106
107
|
self.open = False
|
107
108
|
|
108
|
-
async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
|
109
|
+
async def batch_get(self, keys: list[str], for_update: bool = False) -> list[Optional[bytes]]:
|
109
110
|
results: list[Optional[bytes]] = []
|
110
111
|
for key in keys:
|
111
112
|
obj = await self.get(key)
|
@@ -124,7 +125,7 @@ class LocalTransaction(Transaction):
|
|
124
125
|
|
125
126
|
return results
|
126
127
|
|
127
|
-
async def get(self, key: str) -> Optional[bytes]:
|
128
|
+
async def get(self, key: str, for_update: bool = False) -> Optional[bytes]:
|
128
129
|
if key in self.deleted_keys:
|
129
130
|
raise KeyError(f"Not found {key}")
|
130
131
|
|
@@ -159,9 +160,15 @@ class LocalTransaction(Transaction):
|
|
159
160
|
if key in self.modified_keys:
|
160
161
|
del self.modified_keys[key]
|
161
162
|
|
162
|
-
async def
|
163
|
-
|
164
|
-
|
163
|
+
async def delete_by_prefix(self, prefix: str) -> None:
|
164
|
+
keys = []
|
165
|
+
for key in self.modified_keys.keys():
|
166
|
+
if key.startswith(prefix):
|
167
|
+
keys.append(key)
|
168
|
+
for key in keys:
|
169
|
+
await self.delete(key)
|
170
|
+
|
171
|
+
async def keys(self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True):
|
165
172
|
prev_key = None
|
166
173
|
|
167
174
|
get_all_keys = count == -1
|
@@ -195,7 +202,7 @@ class LocalTransaction(Transaction):
|
|
195
202
|
|
196
203
|
async def count(self, match: str) -> int:
|
197
204
|
value = 0
|
198
|
-
async for _ in self.keys(match
|
205
|
+
async for _ in self.keys(match):
|
199
206
|
value += 1
|
200
207
|
return value
|
201
208
|
|
@@ -214,7 +221,13 @@ class LocalDriver(Driver):
|
|
214
221
|
async def finalize(self):
|
215
222
|
pass
|
216
223
|
|
217
|
-
|
224
|
+
@asynccontextmanager
|
225
|
+
async def transaction(self, read_only: bool = False) -> AsyncGenerator[Transaction, None]:
|
218
226
|
if self.url is None:
|
219
227
|
raise AttributeError("Invalid url")
|
220
|
-
|
228
|
+
txn = LocalTransaction(self.url, self)
|
229
|
+
try:
|
230
|
+
yield txn
|
231
|
+
finally:
|
232
|
+
if txn.open:
|
233
|
+
await txn.abort()
|